1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v3f16_v4f16__u_u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v3f16_v4f16__u_u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <4 x half> asm "; def $0", "=v"() 13 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> poison 14 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 15 ret void 16} 17 18define void @v_shuffle_v3f16_v4f16__0_u_u(ptr addrspace(1) inreg %ptr) { 19; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_u_u: 20; GFX900: ; %bb.0: 21; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX900-NEXT: v_mov_b32_e32 v2, 0 23; GFX900-NEXT: ;;#ASMSTART 24; GFX900-NEXT: ; def v[0:1] 25; GFX900-NEXT: ;;#ASMEND 26; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 27; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 28; GFX900-NEXT: s_waitcnt vmcnt(0) 29; GFX900-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_u_u: 32; GFX90A: ; %bb.0: 33; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX90A-NEXT: v_mov_b32_e32 v2, 0 35; GFX90A-NEXT: ;;#ASMSTART 36; GFX90A-NEXT: ; def v[0:1] 37; GFX90A-NEXT: ;;#ASMEND 38; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 39; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 40; GFX90A-NEXT: s_waitcnt vmcnt(0) 41; GFX90A-NEXT: s_setpc_b64 s[30:31] 42; 43; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_u_u: 44; GFX940: ; %bb.0: 45; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 46; GFX940-NEXT: v_mov_b32_e32 v2, 0 47; GFX940-NEXT: ;;#ASMSTART 48; GFX940-NEXT: ; def v[0:1] 49; GFX940-NEXT: ;;#ASMEND 50; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 51; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 52; GFX940-NEXT: s_waitcnt vmcnt(0) 53; GFX940-NEXT: s_setpc_b64 s[30:31] 54 %vec0 = call <4 x half> asm "; def $0", "=v"() 55 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 56 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 57 ret void 58} 59 60define void @v_shuffle_v3f16_v4f16__1_u_u(ptr addrspace(1) inreg %ptr) { 61; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_u_u: 62; GFX900: ; %bb.0: 63; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 64; GFX900-NEXT: ;;#ASMSTART 65; GFX900-NEXT: ; def v[0:1] 66; GFX900-NEXT: ;;#ASMEND 67; GFX900-NEXT: v_mov_b32_e32 v2, 0 68; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 69; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 70; GFX900-NEXT: s_waitcnt vmcnt(0) 71; GFX900-NEXT: s_setpc_b64 s[30:31] 72; 73; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_u_u: 74; GFX90A: ; %bb.0: 75; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 76; GFX90A-NEXT: ;;#ASMSTART 77; GFX90A-NEXT: ; def v[0:1] 78; GFX90A-NEXT: ;;#ASMEND 79; GFX90A-NEXT: v_mov_b32_e32 v2, 0 80; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 81; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 82; GFX90A-NEXT: s_waitcnt vmcnt(0) 83; GFX90A-NEXT: s_setpc_b64 s[30:31] 84; 85; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_u_u: 86; GFX940: ; %bb.0: 87; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 88; GFX940-NEXT: ;;#ASMSTART 89; GFX940-NEXT: ; def v[0:1] 90; GFX940-NEXT: ;;#ASMEND 91; GFX940-NEXT: v_mov_b32_e32 v2, 0 92; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 93; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 94; GFX940-NEXT: s_waitcnt vmcnt(0) 95; GFX940-NEXT: s_setpc_b64 s[30:31] 96 %vec0 = call <4 x half> asm "; def $0", "=v"() 97 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 98 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 99 ret void 100} 101 102define void @v_shuffle_v3f16_v4f16__2_u_u(ptr addrspace(1) inreg %ptr) { 103; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_u_u: 104; GFX900: ; %bb.0: 105; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX900-NEXT: v_mov_b32_e32 v2, 0 107; GFX900-NEXT: ;;#ASMSTART 108; GFX900-NEXT: ; def v[0:1] 109; GFX900-NEXT: ;;#ASMEND 110; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 111; GFX900-NEXT: s_waitcnt vmcnt(0) 112; GFX900-NEXT: s_setpc_b64 s[30:31] 113; 114; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_u_u: 115; GFX90A: ; %bb.0: 116; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX90A-NEXT: v_mov_b32_e32 v2, 0 118; GFX90A-NEXT: ;;#ASMSTART 119; GFX90A-NEXT: ; def v[0:1] 120; GFX90A-NEXT: ;;#ASMEND 121; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 122; GFX90A-NEXT: s_waitcnt vmcnt(0) 123; GFX90A-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_u_u: 126; GFX940: ; %bb.0: 127; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GFX940-NEXT: v_mov_b32_e32 v2, 0 129; GFX940-NEXT: ;;#ASMSTART 130; GFX940-NEXT: ; def v[0:1] 131; GFX940-NEXT: ;;#ASMEND 132; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 133; GFX940-NEXT: s_waitcnt vmcnt(0) 134; GFX940-NEXT: s_setpc_b64 s[30:31] 135 %vec0 = call <4 x half> asm "; def $0", "=v"() 136 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 137 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 138 ret void 139} 140 141define void @v_shuffle_v3f16_v4f16__3_u_u(ptr addrspace(1) inreg %ptr) { 142; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_u_u: 143; GFX900: ; %bb.0: 144; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 145; GFX900-NEXT: ;;#ASMSTART 146; GFX900-NEXT: ; def v[0:1] 147; GFX900-NEXT: ;;#ASMEND 148; GFX900-NEXT: v_mov_b32_e32 v2, 0 149; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 150; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 151; GFX900-NEXT: s_waitcnt vmcnt(0) 152; GFX900-NEXT: s_setpc_b64 s[30:31] 153; 154; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_u_u: 155; GFX90A: ; %bb.0: 156; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 157; GFX90A-NEXT: ;;#ASMSTART 158; GFX90A-NEXT: ; def v[0:1] 159; GFX90A-NEXT: ;;#ASMEND 160; GFX90A-NEXT: v_mov_b32_e32 v2, 0 161; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 162; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 163; GFX90A-NEXT: s_waitcnt vmcnt(0) 164; GFX90A-NEXT: s_setpc_b64 s[30:31] 165; 166; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_u_u: 167; GFX940: ; %bb.0: 168; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; GFX940-NEXT: ;;#ASMSTART 170; GFX940-NEXT: ; def v[0:1] 171; GFX940-NEXT: ;;#ASMEND 172; GFX940-NEXT: v_mov_b32_e32 v2, 0 173; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 174; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 175; GFX940-NEXT: s_waitcnt vmcnt(0) 176; GFX940-NEXT: s_setpc_b64 s[30:31] 177 %vec0 = call <4 x half> asm "; def $0", "=v"() 178 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 179 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 180 ret void 181} 182 183define void @v_shuffle_v3f16_v4f16__4_u_u(ptr addrspace(1) inreg %ptr) { 184; GFX9-LABEL: v_shuffle_v3f16_v4f16__4_u_u: 185; GFX9: ; %bb.0: 186; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 187; GFX9-NEXT: s_setpc_b64 s[30:31] 188 %vec0 = call <4 x half> asm "; def $0", "=v"() 189 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 poison, i32 poison> 190 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 191 ret void 192} 193 194define void @v_shuffle_v3f16_v4f16__5_u_u(ptr addrspace(1) inreg %ptr) { 195; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_u_u: 196; GFX900: ; %bb.0: 197; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 198; GFX900-NEXT: ;;#ASMSTART 199; GFX900-NEXT: ; def v[0:1] 200; GFX900-NEXT: ;;#ASMEND 201; GFX900-NEXT: v_mov_b32_e32 v2, 0 202; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 203; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 204; GFX900-NEXT: s_waitcnt vmcnt(0) 205; GFX900-NEXT: s_setpc_b64 s[30:31] 206; 207; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_u_u: 208; GFX90A: ; %bb.0: 209; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 210; GFX90A-NEXT: ;;#ASMSTART 211; GFX90A-NEXT: ; def v[0:1] 212; GFX90A-NEXT: ;;#ASMEND 213; GFX90A-NEXT: v_mov_b32_e32 v2, 0 214; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 215; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 216; GFX90A-NEXT: s_waitcnt vmcnt(0) 217; GFX90A-NEXT: s_setpc_b64 s[30:31] 218; 219; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_u_u: 220; GFX940: ; %bb.0: 221; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 222; GFX940-NEXT: ;;#ASMSTART 223; GFX940-NEXT: ; def v[0:1] 224; GFX940-NEXT: ;;#ASMEND 225; GFX940-NEXT: v_mov_b32_e32 v2, 0 226; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 227; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 228; GFX940-NEXT: s_waitcnt vmcnt(0) 229; GFX940-NEXT: s_setpc_b64 s[30:31] 230 %vec0 = call <4 x half> asm "; def $0", "=v"() 231 %vec1 = call <4 x half> asm "; def $0", "=v"() 232 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison> 233 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 234 ret void 235} 236 237define void @v_shuffle_v3f16_v4f16__6_u_u(ptr addrspace(1) inreg %ptr) { 238; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_u_u: 239; GFX900: ; %bb.0: 240; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 241; GFX900-NEXT: v_mov_b32_e32 v2, 0 242; GFX900-NEXT: ;;#ASMSTART 243; GFX900-NEXT: ; def v[0:1] 244; GFX900-NEXT: ;;#ASMEND 245; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 246; GFX900-NEXT: s_waitcnt vmcnt(0) 247; GFX900-NEXT: s_setpc_b64 s[30:31] 248; 249; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_u_u: 250; GFX90A: ; %bb.0: 251; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 252; GFX90A-NEXT: v_mov_b32_e32 v2, 0 253; GFX90A-NEXT: ;;#ASMSTART 254; GFX90A-NEXT: ; def v[0:1] 255; GFX90A-NEXT: ;;#ASMEND 256; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 257; GFX90A-NEXT: s_waitcnt vmcnt(0) 258; GFX90A-NEXT: s_setpc_b64 s[30:31] 259; 260; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_u_u: 261; GFX940: ; %bb.0: 262; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 263; GFX940-NEXT: v_mov_b32_e32 v2, 0 264; GFX940-NEXT: ;;#ASMSTART 265; GFX940-NEXT: ; def v[0:1] 266; GFX940-NEXT: ;;#ASMEND 267; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 268; GFX940-NEXT: s_waitcnt vmcnt(0) 269; GFX940-NEXT: s_setpc_b64 s[30:31] 270 %vec0 = call <4 x half> asm "; def $0", "=v"() 271 %vec1 = call <4 x half> asm "; def $0", "=v"() 272 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison> 273 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 274 ret void 275} 276 277define void @v_shuffle_v3f16_v4f16__7_u_u(ptr addrspace(1) inreg %ptr) { 278; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_u: 279; GFX900: ; %bb.0: 280; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 281; GFX900-NEXT: ;;#ASMSTART 282; GFX900-NEXT: ; def v[0:1] 283; GFX900-NEXT: ;;#ASMEND 284; GFX900-NEXT: v_mov_b32_e32 v2, 0 285; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 286; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 287; GFX900-NEXT: s_waitcnt vmcnt(0) 288; GFX900-NEXT: s_setpc_b64 s[30:31] 289; 290; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_u: 291; GFX90A: ; %bb.0: 292; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 293; GFX90A-NEXT: ;;#ASMSTART 294; GFX90A-NEXT: ; def v[0:1] 295; GFX90A-NEXT: ;;#ASMEND 296; GFX90A-NEXT: v_mov_b32_e32 v2, 0 297; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 298; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 299; GFX90A-NEXT: s_waitcnt vmcnt(0) 300; GFX90A-NEXT: s_setpc_b64 s[30:31] 301; 302; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_u: 303; GFX940: ; %bb.0: 304; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 305; GFX940-NEXT: ;;#ASMSTART 306; GFX940-NEXT: ; def v[0:1] 307; GFX940-NEXT: ;;#ASMEND 308; GFX940-NEXT: v_mov_b32_e32 v2, 0 309; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 310; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 311; GFX940-NEXT: s_waitcnt vmcnt(0) 312; GFX940-NEXT: s_setpc_b64 s[30:31] 313 %vec0 = call <4 x half> asm "; def $0", "=v"() 314 %vec1 = call <4 x half> asm "; def $0", "=v"() 315 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison> 316 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 317 ret void 318} 319 320define void @v_shuffle_v3f16_v4f16__7_0_u(ptr addrspace(1) inreg %ptr) { 321; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_u: 322; GFX900: ; %bb.0: 323; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 324; GFX900-NEXT: ;;#ASMSTART 325; GFX900-NEXT: ; def v[0:1] 326; GFX900-NEXT: ;;#ASMEND 327; GFX900-NEXT: v_mov_b32_e32 v3, 0 328; GFX900-NEXT: ;;#ASMSTART 329; GFX900-NEXT: ; def v[1:2] 330; GFX900-NEXT: ;;#ASMEND 331; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 332; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 333; GFX900-NEXT: s_waitcnt vmcnt(0) 334; GFX900-NEXT: s_setpc_b64 s[30:31] 335; 336; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_u: 337; GFX90A: ; %bb.0: 338; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 339; GFX90A-NEXT: ;;#ASMSTART 340; GFX90A-NEXT: ; def v[0:1] 341; GFX90A-NEXT: ;;#ASMEND 342; GFX90A-NEXT: v_mov_b32_e32 v4, 0 343; GFX90A-NEXT: ;;#ASMSTART 344; GFX90A-NEXT: ; def v[2:3] 345; GFX90A-NEXT: ;;#ASMEND 346; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 347; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 348; GFX90A-NEXT: s_waitcnt vmcnt(0) 349; GFX90A-NEXT: s_setpc_b64 s[30:31] 350; 351; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_u: 352; GFX940: ; %bb.0: 353; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 354; GFX940-NEXT: ;;#ASMSTART 355; GFX940-NEXT: ; def v[0:1] 356; GFX940-NEXT: ;;#ASMEND 357; GFX940-NEXT: v_mov_b32_e32 v4, 0 358; GFX940-NEXT: ;;#ASMSTART 359; GFX940-NEXT: ; def v[2:3] 360; GFX940-NEXT: ;;#ASMEND 361; GFX940-NEXT: s_nop 0 362; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 363; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 364; GFX940-NEXT: s_waitcnt vmcnt(0) 365; GFX940-NEXT: s_setpc_b64 s[30:31] 366 %vec0 = call <4 x half> asm "; def $0", "=v"() 367 %vec1 = call <4 x half> asm "; def $0", "=v"() 368 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 poison> 369 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 370 ret void 371} 372 373define void @v_shuffle_v3f16_v4f16__7_1_u(ptr addrspace(1) inreg %ptr) { 374; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_u: 375; GFX900: ; %bb.0: 376; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX900-NEXT: ;;#ASMSTART 378; GFX900-NEXT: ; def v[0:1] 379; GFX900-NEXT: ;;#ASMEND 380; GFX900-NEXT: s_mov_b32 s4, 0x7060302 381; GFX900-NEXT: v_mov_b32_e32 v3, 0 382; GFX900-NEXT: ;;#ASMSTART 383; GFX900-NEXT: ; def v[1:2] 384; GFX900-NEXT: ;;#ASMEND 385; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 386; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 387; GFX900-NEXT: s_waitcnt vmcnt(0) 388; GFX900-NEXT: s_setpc_b64 s[30:31] 389; 390; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_u: 391; GFX90A: ; %bb.0: 392; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 393; GFX90A-NEXT: ;;#ASMSTART 394; GFX90A-NEXT: ; def v[0:1] 395; GFX90A-NEXT: ;;#ASMEND 396; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 397; GFX90A-NEXT: v_mov_b32_e32 v4, 0 398; GFX90A-NEXT: ;;#ASMSTART 399; GFX90A-NEXT: ; def v[2:3] 400; GFX90A-NEXT: ;;#ASMEND 401; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 402; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 403; GFX90A-NEXT: s_waitcnt vmcnt(0) 404; GFX90A-NEXT: s_setpc_b64 s[30:31] 405; 406; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_u: 407; GFX940: ; %bb.0: 408; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 409; GFX940-NEXT: ;;#ASMSTART 410; GFX940-NEXT: ; def v[0:1] 411; GFX940-NEXT: ;;#ASMEND 412; GFX940-NEXT: s_mov_b32 s2, 0x7060302 413; GFX940-NEXT: v_mov_b32_e32 v4, 0 414; GFX940-NEXT: ;;#ASMSTART 415; GFX940-NEXT: ; def v[2:3] 416; GFX940-NEXT: ;;#ASMEND 417; GFX940-NEXT: s_nop 0 418; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 419; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 420; GFX940-NEXT: s_waitcnt vmcnt(0) 421; GFX940-NEXT: s_setpc_b64 s[30:31] 422 %vec0 = call <4 x half> asm "; def $0", "=v"() 423 %vec1 = call <4 x half> asm "; def $0", "=v"() 424 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 poison> 425 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 426 ret void 427} 428 429define void @v_shuffle_v3f16_v4f16__7_2_u(ptr addrspace(1) inreg %ptr) { 430; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_u: 431; GFX900: ; %bb.0: 432; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 433; GFX900-NEXT: ;;#ASMSTART 434; GFX900-NEXT: ; def v[0:1] 435; GFX900-NEXT: ;;#ASMEND 436; GFX900-NEXT: v_mov_b32_e32 v4, 0 437; GFX900-NEXT: ;;#ASMSTART 438; GFX900-NEXT: ; def v[2:3] 439; GFX900-NEXT: ;;#ASMEND 440; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 441; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 442; GFX900-NEXT: s_waitcnt vmcnt(0) 443; GFX900-NEXT: s_setpc_b64 s[30:31] 444; 445; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_u: 446; GFX90A: ; %bb.0: 447; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 448; GFX90A-NEXT: ;;#ASMSTART 449; GFX90A-NEXT: ; def v[0:1] 450; GFX90A-NEXT: ;;#ASMEND 451; GFX90A-NEXT: v_mov_b32_e32 v4, 0 452; GFX90A-NEXT: ;;#ASMSTART 453; GFX90A-NEXT: ; def v[2:3] 454; GFX90A-NEXT: ;;#ASMEND 455; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 456; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 457; GFX90A-NEXT: s_waitcnt vmcnt(0) 458; GFX90A-NEXT: s_setpc_b64 s[30:31] 459; 460; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_u: 461; GFX940: ; %bb.0: 462; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 463; GFX940-NEXT: ;;#ASMSTART 464; GFX940-NEXT: ; def v[0:1] 465; GFX940-NEXT: ;;#ASMEND 466; GFX940-NEXT: v_mov_b32_e32 v4, 0 467; GFX940-NEXT: ;;#ASMSTART 468; GFX940-NEXT: ; def v[2:3] 469; GFX940-NEXT: ;;#ASMEND 470; GFX940-NEXT: s_nop 0 471; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 472; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 473; GFX940-NEXT: s_waitcnt vmcnt(0) 474; GFX940-NEXT: s_setpc_b64 s[30:31] 475 %vec0 = call <4 x half> asm "; def $0", "=v"() 476 %vec1 = call <4 x half> asm "; def $0", "=v"() 477 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 poison> 478 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 479 ret void 480} 481 482define void @v_shuffle_v3f16_v4f16__7_3_u(ptr addrspace(1) inreg %ptr) { 483; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_u: 484; GFX900: ; %bb.0: 485; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 486; GFX900-NEXT: ;;#ASMSTART 487; GFX900-NEXT: ; def v[0:1] 488; GFX900-NEXT: ;;#ASMEND 489; GFX900-NEXT: s_mov_b32 s4, 0x7060302 490; GFX900-NEXT: v_mov_b32_e32 v4, 0 491; GFX900-NEXT: ;;#ASMSTART 492; GFX900-NEXT: ; def v[2:3] 493; GFX900-NEXT: ;;#ASMEND 494; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 495; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 496; GFX900-NEXT: s_waitcnt vmcnt(0) 497; GFX900-NEXT: s_setpc_b64 s[30:31] 498; 499; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_u: 500; GFX90A: ; %bb.0: 501; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 502; GFX90A-NEXT: ;;#ASMSTART 503; GFX90A-NEXT: ; def v[0:1] 504; GFX90A-NEXT: ;;#ASMEND 505; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 506; GFX90A-NEXT: v_mov_b32_e32 v4, 0 507; GFX90A-NEXT: ;;#ASMSTART 508; GFX90A-NEXT: ; def v[2:3] 509; GFX90A-NEXT: ;;#ASMEND 510; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 511; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 512; GFX90A-NEXT: s_waitcnt vmcnt(0) 513; GFX90A-NEXT: s_setpc_b64 s[30:31] 514; 515; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_u: 516; GFX940: ; %bb.0: 517; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 518; GFX940-NEXT: ;;#ASMSTART 519; GFX940-NEXT: ; def v[0:1] 520; GFX940-NEXT: ;;#ASMEND 521; GFX940-NEXT: s_mov_b32 s2, 0x7060302 522; GFX940-NEXT: v_mov_b32_e32 v4, 0 523; GFX940-NEXT: ;;#ASMSTART 524; GFX940-NEXT: ; def v[2:3] 525; GFX940-NEXT: ;;#ASMEND 526; GFX940-NEXT: s_nop 0 527; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 528; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 529; GFX940-NEXT: s_waitcnt vmcnt(0) 530; GFX940-NEXT: s_setpc_b64 s[30:31] 531 %vec0 = call <4 x half> asm "; def $0", "=v"() 532 %vec1 = call <4 x half> asm "; def $0", "=v"() 533 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 poison> 534 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 535 ret void 536} 537 538define void @v_shuffle_v3f16_v4f16__7_4_u(ptr addrspace(1) inreg %ptr) { 539; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_u: 540; GFX900: ; %bb.0: 541; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 542; GFX900-NEXT: ;;#ASMSTART 543; GFX900-NEXT: ; def v[0:1] 544; GFX900-NEXT: ;;#ASMEND 545; GFX900-NEXT: v_mov_b32_e32 v2, 0 546; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 547; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 548; GFX900-NEXT: s_waitcnt vmcnt(0) 549; GFX900-NEXT: s_setpc_b64 s[30:31] 550; 551; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_u: 552; GFX90A: ; %bb.0: 553; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 554; GFX90A-NEXT: ;;#ASMSTART 555; GFX90A-NEXT: ; def v[0:1] 556; GFX90A-NEXT: ;;#ASMEND 557; GFX90A-NEXT: v_mov_b32_e32 v2, 0 558; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 559; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 560; GFX90A-NEXT: s_waitcnt vmcnt(0) 561; GFX90A-NEXT: s_setpc_b64 s[30:31] 562; 563; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_u: 564; GFX940: ; %bb.0: 565; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 566; GFX940-NEXT: ;;#ASMSTART 567; GFX940-NEXT: ; def v[0:1] 568; GFX940-NEXT: ;;#ASMEND 569; GFX940-NEXT: v_mov_b32_e32 v2, 0 570; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 571; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 572; GFX940-NEXT: s_waitcnt vmcnt(0) 573; GFX940-NEXT: s_setpc_b64 s[30:31] 574 %vec0 = call <4 x half> asm "; def $0", "=v"() 575 %vec1 = call <4 x half> asm "; def $0", "=v"() 576 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 poison> 577 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 578 ret void 579} 580 581define void @v_shuffle_v3f16_v4f16__7_5_u(ptr addrspace(1) inreg %ptr) { 582; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_u: 583; GFX900: ; %bb.0: 584; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 585; GFX900-NEXT: ;;#ASMSTART 586; GFX900-NEXT: ; def v[0:1] 587; GFX900-NEXT: ;;#ASMEND 588; GFX900-NEXT: s_mov_b32 s4, 0x7060302 589; GFX900-NEXT: v_mov_b32_e32 v2, 0 590; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 591; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 592; GFX900-NEXT: s_waitcnt vmcnt(0) 593; GFX900-NEXT: s_setpc_b64 s[30:31] 594; 595; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_u: 596; GFX90A: ; %bb.0: 597; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 598; GFX90A-NEXT: ;;#ASMSTART 599; GFX90A-NEXT: ; def v[0:1] 600; GFX90A-NEXT: ;;#ASMEND 601; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 602; GFX90A-NEXT: v_mov_b32_e32 v2, 0 603; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 604; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 605; GFX90A-NEXT: s_waitcnt vmcnt(0) 606; GFX90A-NEXT: s_setpc_b64 s[30:31] 607; 608; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_u: 609; GFX940: ; %bb.0: 610; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 611; GFX940-NEXT: ;;#ASMSTART 612; GFX940-NEXT: ; def v[0:1] 613; GFX940-NEXT: ;;#ASMEND 614; GFX940-NEXT: s_mov_b32 s2, 0x7060302 615; GFX940-NEXT: v_mov_b32_e32 v2, 0 616; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 617; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 618; GFX940-NEXT: s_waitcnt vmcnt(0) 619; GFX940-NEXT: s_setpc_b64 s[30:31] 620 %vec0 = call <4 x half> asm "; def $0", "=v"() 621 %vec1 = call <4 x half> asm "; def $0", "=v"() 622 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 poison> 623 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 624 ret void 625} 626 627define void @v_shuffle_v3f16_v4f16__7_6_u(ptr addrspace(1) inreg %ptr) { 628; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_u: 629; GFX900: ; %bb.0: 630; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 631; GFX900-NEXT: ;;#ASMSTART 632; GFX900-NEXT: ; def v[0:1] 633; GFX900-NEXT: ;;#ASMEND 634; GFX900-NEXT: v_mov_b32_e32 v2, 0 635; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 636; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 637; GFX900-NEXT: s_waitcnt vmcnt(0) 638; GFX900-NEXT: s_setpc_b64 s[30:31] 639; 640; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_u: 641; GFX90A: ; %bb.0: 642; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 643; GFX90A-NEXT: ;;#ASMSTART 644; GFX90A-NEXT: ; def v[0:1] 645; GFX90A-NEXT: ;;#ASMEND 646; GFX90A-NEXT: v_mov_b32_e32 v2, 0 647; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 648; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 649; GFX90A-NEXT: s_waitcnt vmcnt(0) 650; GFX90A-NEXT: s_setpc_b64 s[30:31] 651; 652; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_u: 653; GFX940: ; %bb.0: 654; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 655; GFX940-NEXT: ;;#ASMSTART 656; GFX940-NEXT: ; def v[0:1] 657; GFX940-NEXT: ;;#ASMEND 658; GFX940-NEXT: v_mov_b32_e32 v2, 0 659; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 660; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 661; GFX940-NEXT: s_waitcnt vmcnt(0) 662; GFX940-NEXT: s_setpc_b64 s[30:31] 663 %vec0 = call <4 x half> asm "; def $0", "=v"() 664 %vec1 = call <4 x half> asm "; def $0", "=v"() 665 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 poison> 666 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 667 ret void 668} 669 670define void @v_shuffle_v3f16_v4f16__7_7_u(ptr addrspace(1) inreg %ptr) { 671; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_u: 672; GFX900: ; %bb.0: 673; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 674; GFX900-NEXT: ;;#ASMSTART 675; GFX900-NEXT: ; def v[0:1] 676; GFX900-NEXT: ;;#ASMEND 677; GFX900-NEXT: s_mov_b32 s4, 0x7060302 678; GFX900-NEXT: v_mov_b32_e32 v2, 0 679; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 680; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 681; GFX900-NEXT: s_waitcnt vmcnt(0) 682; GFX900-NEXT: s_setpc_b64 s[30:31] 683; 684; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_u: 685; GFX90A: ; %bb.0: 686; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 687; GFX90A-NEXT: ;;#ASMSTART 688; GFX90A-NEXT: ; def v[0:1] 689; GFX90A-NEXT: ;;#ASMEND 690; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 691; GFX90A-NEXT: v_mov_b32_e32 v2, 0 692; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 693; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 694; GFX90A-NEXT: s_waitcnt vmcnt(0) 695; GFX90A-NEXT: s_setpc_b64 s[30:31] 696; 697; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_u: 698; GFX940: ; %bb.0: 699; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 700; GFX940-NEXT: ;;#ASMSTART 701; GFX940-NEXT: ; def v[0:1] 702; GFX940-NEXT: ;;#ASMEND 703; GFX940-NEXT: s_mov_b32 s2, 0x7060302 704; GFX940-NEXT: v_mov_b32_e32 v2, 0 705; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 706; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 707; GFX940-NEXT: s_waitcnt vmcnt(0) 708; GFX940-NEXT: s_setpc_b64 s[30:31] 709 %vec0 = call <4 x half> asm "; def $0", "=v"() 710 %vec1 = call <4 x half> asm "; def $0", "=v"() 711 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 poison> 712 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 713 ret void 714} 715 716define void @v_shuffle_v3f16_v4f16__7_7_0(ptr addrspace(1) inreg %ptr) { 717; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_0: 718; GFX900: ; %bb.0: 719; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 720; GFX900-NEXT: ;;#ASMSTART 721; GFX900-NEXT: ; def v[0:1] 722; GFX900-NEXT: ;;#ASMEND 723; GFX900-NEXT: v_mov_b32_e32 v3, 0 724; GFX900-NEXT: ;;#ASMSTART 725; GFX900-NEXT: ; def v[1:2] 726; GFX900-NEXT: ;;#ASMEND 727; GFX900-NEXT: s_mov_b32 s4, 0x7060302 728; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 729; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 730; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 731; GFX900-NEXT: s_waitcnt vmcnt(0) 732; GFX900-NEXT: s_setpc_b64 s[30:31] 733; 734; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_0: 735; GFX90A: ; %bb.0: 736; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 737; GFX90A-NEXT: v_mov_b32_e32 v4, 0 738; GFX90A-NEXT: ;;#ASMSTART 739; GFX90A-NEXT: ; def v[0:1] 740; GFX90A-NEXT: ;;#ASMEND 741; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 742; GFX90A-NEXT: ;;#ASMSTART 743; GFX90A-NEXT: ; def v[2:3] 744; GFX90A-NEXT: ;;#ASMEND 745; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 746; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 747; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 748; GFX90A-NEXT: s_waitcnt vmcnt(0) 749; GFX90A-NEXT: s_setpc_b64 s[30:31] 750; 751; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_0: 752; GFX940: ; %bb.0: 753; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 754; GFX940-NEXT: v_mov_b32_e32 v4, 0 755; GFX940-NEXT: ;;#ASMSTART 756; GFX940-NEXT: ; def v[0:1] 757; GFX940-NEXT: ;;#ASMEND 758; GFX940-NEXT: s_mov_b32 s2, 0x7060302 759; GFX940-NEXT: ;;#ASMSTART 760; GFX940-NEXT: ; def v[2:3] 761; GFX940-NEXT: ;;#ASMEND 762; GFX940-NEXT: s_nop 0 763; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 764; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 765; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 766; GFX940-NEXT: s_waitcnt vmcnt(0) 767; GFX940-NEXT: s_setpc_b64 s[30:31] 768 %vec0 = call <4 x half> asm "; def $0", "=v"() 769 %vec1 = call <4 x half> asm "; def $0", "=v"() 770 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 0> 771 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 772 ret void 773} 774 775define void @v_shuffle_v3f16_v4f16__7_7_1(ptr addrspace(1) inreg %ptr) { 776; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_1: 777; GFX900: ; %bb.0: 778; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 779; GFX900-NEXT: ;;#ASMSTART 780; GFX900-NEXT: ; def v[0:1] 781; GFX900-NEXT: ;;#ASMEND 782; GFX900-NEXT: v_mov_b32_e32 v3, 0 783; GFX900-NEXT: ;;#ASMSTART 784; GFX900-NEXT: ; def v[1:2] 785; GFX900-NEXT: ;;#ASMEND 786; GFX900-NEXT: s_mov_b32 s4, 0x7060302 787; GFX900-NEXT: v_perm_b32 v1, v2, v2, s4 788; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 789; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 790; GFX900-NEXT: s_waitcnt vmcnt(0) 791; GFX900-NEXT: s_setpc_b64 s[30:31] 792; 793; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_1: 794; GFX90A: ; %bb.0: 795; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 796; GFX90A-NEXT: v_mov_b32_e32 v4, 0 797; GFX90A-NEXT: ;;#ASMSTART 798; GFX90A-NEXT: ; def v[0:1] 799; GFX90A-NEXT: ;;#ASMEND 800; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 801; GFX90A-NEXT: ;;#ASMSTART 802; GFX90A-NEXT: ; def v[2:3] 803; GFX90A-NEXT: ;;#ASMEND 804; GFX90A-NEXT: v_perm_b32 v1, v3, v3, s4 805; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 806; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 807; GFX90A-NEXT: s_waitcnt vmcnt(0) 808; GFX90A-NEXT: s_setpc_b64 s[30:31] 809; 810; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_1: 811; GFX940: ; %bb.0: 812; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 813; GFX940-NEXT: v_mov_b32_e32 v4, 0 814; GFX940-NEXT: ;;#ASMSTART 815; GFX940-NEXT: ; def v[0:1] 816; GFX940-NEXT: ;;#ASMEND 817; GFX940-NEXT: s_mov_b32 s2, 0x7060302 818; GFX940-NEXT: ;;#ASMSTART 819; GFX940-NEXT: ; def v[2:3] 820; GFX940-NEXT: ;;#ASMEND 821; GFX940-NEXT: s_nop 0 822; GFX940-NEXT: v_perm_b32 v1, v3, v3, s2 823; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 824; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 825; GFX940-NEXT: s_waitcnt vmcnt(0) 826; GFX940-NEXT: s_setpc_b64 s[30:31] 827 %vec0 = call <4 x half> asm "; def $0", "=v"() 828 %vec1 = call <4 x half> asm "; def $0", "=v"() 829 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 1> 830 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 831 ret void 832} 833 834define void @v_shuffle_v3f16_v4f16__7_7_2(ptr addrspace(1) inreg %ptr) { 835; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_2: 836; GFX900: ; %bb.0: 837; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 838; GFX900-NEXT: v_mov_b32_e32 v4, 0 839; GFX900-NEXT: ;;#ASMSTART 840; GFX900-NEXT: ; def v[0:1] 841; GFX900-NEXT: ;;#ASMEND 842; GFX900-NEXT: s_mov_b32 s4, 0x7060302 843; GFX900-NEXT: ;;#ASMSTART 844; GFX900-NEXT: ; def v[2:3] 845; GFX900-NEXT: ;;#ASMEND 846; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 847; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 848; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 849; GFX900-NEXT: s_waitcnt vmcnt(0) 850; GFX900-NEXT: s_setpc_b64 s[30:31] 851; 852; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_2: 853; GFX90A: ; %bb.0: 854; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 855; GFX90A-NEXT: v_mov_b32_e32 v4, 0 856; GFX90A-NEXT: ;;#ASMSTART 857; GFX90A-NEXT: ; def v[0:1] 858; GFX90A-NEXT: ;;#ASMEND 859; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 860; GFX90A-NEXT: ;;#ASMSTART 861; GFX90A-NEXT: ; def v[2:3] 862; GFX90A-NEXT: ;;#ASMEND 863; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 864; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 865; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 866; GFX90A-NEXT: s_waitcnt vmcnt(0) 867; GFX90A-NEXT: s_setpc_b64 s[30:31] 868; 869; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_2: 870; GFX940: ; %bb.0: 871; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 872; GFX940-NEXT: v_mov_b32_e32 v4, 0 873; GFX940-NEXT: ;;#ASMSTART 874; GFX940-NEXT: ; def v[0:1] 875; GFX940-NEXT: ;;#ASMEND 876; GFX940-NEXT: s_mov_b32 s2, 0x7060302 877; GFX940-NEXT: ;;#ASMSTART 878; GFX940-NEXT: ; def v[2:3] 879; GFX940-NEXT: ;;#ASMEND 880; GFX940-NEXT: s_nop 0 881; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 882; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 883; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 884; GFX940-NEXT: s_waitcnt vmcnt(0) 885; GFX940-NEXT: s_setpc_b64 s[30:31] 886 %vec0 = call <4 x half> asm "; def $0", "=v"() 887 %vec1 = call <4 x half> asm "; def $0", "=v"() 888 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 2> 889 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 890 ret void 891} 892 893define void @v_shuffle_v3f16_v4f16__7_7_3(ptr addrspace(1) inreg %ptr) { 894; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_3: 895; GFX900: ; %bb.0: 896; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 897; GFX900-NEXT: v_mov_b32_e32 v4, 0 898; GFX900-NEXT: ;;#ASMSTART 899; GFX900-NEXT: ; def v[0:1] 900; GFX900-NEXT: ;;#ASMEND 901; GFX900-NEXT: s_mov_b32 s4, 0x7060302 902; GFX900-NEXT: ;;#ASMSTART 903; GFX900-NEXT: ; def v[2:3] 904; GFX900-NEXT: ;;#ASMEND 905; GFX900-NEXT: v_perm_b32 v0, v3, v3, s4 906; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 907; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 908; GFX900-NEXT: s_waitcnt vmcnt(0) 909; GFX900-NEXT: s_setpc_b64 s[30:31] 910; 911; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_3: 912; GFX90A: ; %bb.0: 913; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 914; GFX90A-NEXT: v_mov_b32_e32 v4, 0 915; GFX90A-NEXT: ;;#ASMSTART 916; GFX90A-NEXT: ; def v[0:1] 917; GFX90A-NEXT: ;;#ASMEND 918; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 919; GFX90A-NEXT: ;;#ASMSTART 920; GFX90A-NEXT: ; def v[2:3] 921; GFX90A-NEXT: ;;#ASMEND 922; GFX90A-NEXT: v_perm_b32 v0, v3, v3, s4 923; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 924; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 925; GFX90A-NEXT: s_waitcnt vmcnt(0) 926; GFX90A-NEXT: s_setpc_b64 s[30:31] 927; 928; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_3: 929; GFX940: ; %bb.0: 930; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 931; GFX940-NEXT: v_mov_b32_e32 v4, 0 932; GFX940-NEXT: ;;#ASMSTART 933; GFX940-NEXT: ; def v[0:1] 934; GFX940-NEXT: ;;#ASMEND 935; GFX940-NEXT: s_mov_b32 s2, 0x7060302 936; GFX940-NEXT: ;;#ASMSTART 937; GFX940-NEXT: ; def v[2:3] 938; GFX940-NEXT: ;;#ASMEND 939; GFX940-NEXT: s_nop 0 940; GFX940-NEXT: v_perm_b32 v0, v3, v3, s2 941; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 942; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 943; GFX940-NEXT: s_waitcnt vmcnt(0) 944; GFX940-NEXT: s_setpc_b64 s[30:31] 945 %vec0 = call <4 x half> asm "; def $0", "=v"() 946 %vec1 = call <4 x half> asm "; def $0", "=v"() 947 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 3> 948 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 949 ret void 950} 951 952define void @v_shuffle_v3f16_v4f16__7_7_4(ptr addrspace(1) inreg %ptr) { 953; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_4: 954; GFX900: ; %bb.0: 955; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 956; GFX900-NEXT: v_mov_b32_e32 v2, 0 957; GFX900-NEXT: ;;#ASMSTART 958; GFX900-NEXT: ; def v[0:1] 959; GFX900-NEXT: ;;#ASMEND 960; GFX900-NEXT: s_mov_b32 s4, 0x7060302 961; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 962; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 963; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 964; GFX900-NEXT: s_waitcnt vmcnt(0) 965; GFX900-NEXT: s_setpc_b64 s[30:31] 966; 967; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_4: 968; GFX90A: ; %bb.0: 969; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 970; GFX90A-NEXT: v_mov_b32_e32 v2, 0 971; GFX90A-NEXT: ;;#ASMSTART 972; GFX90A-NEXT: ; def v[0:1] 973; GFX90A-NEXT: ;;#ASMEND 974; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 975; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 976; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 977; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 978; GFX90A-NEXT: s_waitcnt vmcnt(0) 979; GFX90A-NEXT: s_setpc_b64 s[30:31] 980; 981; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_4: 982; GFX940: ; %bb.0: 983; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 984; GFX940-NEXT: v_mov_b32_e32 v2, 0 985; GFX940-NEXT: ;;#ASMSTART 986; GFX940-NEXT: ; def v[0:1] 987; GFX940-NEXT: ;;#ASMEND 988; GFX940-NEXT: s_mov_b32 s2, 0x7060302 989; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 990; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 991; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 992; GFX940-NEXT: s_waitcnt vmcnt(0) 993; GFX940-NEXT: s_setpc_b64 s[30:31] 994 %vec0 = call <4 x half> asm "; def $0", "=v"() 995 %vec1 = call <4 x half> asm "; def $0", "=v"() 996 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 4> 997 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 998 ret void 999} 1000 1001define void @v_shuffle_v3f16_v4f16__7_7_5(ptr addrspace(1) inreg %ptr) { 1002; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_5: 1003; GFX900: ; %bb.0: 1004; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1005; GFX900-NEXT: v_mov_b32_e32 v2, 0 1006; GFX900-NEXT: ;;#ASMSTART 1007; GFX900-NEXT: ; def v[0:1] 1008; GFX900-NEXT: ;;#ASMEND 1009; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1010; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1011; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1012; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1013; GFX900-NEXT: s_waitcnt vmcnt(0) 1014; GFX900-NEXT: s_setpc_b64 s[30:31] 1015; 1016; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_5: 1017; GFX90A: ; %bb.0: 1018; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1019; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1020; GFX90A-NEXT: ;;#ASMSTART 1021; GFX90A-NEXT: ; def v[0:1] 1022; GFX90A-NEXT: ;;#ASMEND 1023; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1024; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 1025; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 1026; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1027; GFX90A-NEXT: s_waitcnt vmcnt(0) 1028; GFX90A-NEXT: s_setpc_b64 s[30:31] 1029; 1030; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_5: 1031; GFX940: ; %bb.0: 1032; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1033; GFX940-NEXT: v_mov_b32_e32 v2, 0 1034; GFX940-NEXT: ;;#ASMSTART 1035; GFX940-NEXT: ; def v[0:1] 1036; GFX940-NEXT: ;;#ASMEND 1037; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1038; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 1039; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 1040; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1041; GFX940-NEXT: s_waitcnt vmcnt(0) 1042; GFX940-NEXT: s_setpc_b64 s[30:31] 1043 %vec0 = call <4 x half> asm "; def $0", "=v"() 1044 %vec1 = call <4 x half> asm "; def $0", "=v"() 1045 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 5> 1046 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1047 ret void 1048} 1049 1050define void @v_shuffle_v3f16_v4f16__7_7_6(ptr addrspace(1) inreg %ptr) { 1051; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_6: 1052; GFX900: ; %bb.0: 1053; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1054; GFX900-NEXT: v_mov_b32_e32 v2, 0 1055; GFX900-NEXT: ;;#ASMSTART 1056; GFX900-NEXT: ; def v[0:1] 1057; GFX900-NEXT: ;;#ASMEND 1058; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1059; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 1060; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 1061; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1062; GFX900-NEXT: s_waitcnt vmcnt(0) 1063; GFX900-NEXT: s_setpc_b64 s[30:31] 1064; 1065; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_6: 1066; GFX90A: ; %bb.0: 1067; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1068; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1069; GFX90A-NEXT: ;;#ASMSTART 1070; GFX90A-NEXT: ; def v[0:1] 1071; GFX90A-NEXT: ;;#ASMEND 1072; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1073; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 1074; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 1075; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1076; GFX90A-NEXT: s_waitcnt vmcnt(0) 1077; GFX90A-NEXT: s_setpc_b64 s[30:31] 1078; 1079; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_6: 1080; GFX940: ; %bb.0: 1081; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1082; GFX940-NEXT: v_mov_b32_e32 v2, 0 1083; GFX940-NEXT: ;;#ASMSTART 1084; GFX940-NEXT: ; def v[0:1] 1085; GFX940-NEXT: ;;#ASMEND 1086; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1087; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 1088; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 1089; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1090; GFX940-NEXT: s_waitcnt vmcnt(0) 1091; GFX940-NEXT: s_setpc_b64 s[30:31] 1092 %vec0 = call <4 x half> asm "; def $0", "=v"() 1093 %vec1 = call <4 x half> asm "; def $0", "=v"() 1094 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 6> 1095 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1096 ret void 1097} 1098 1099define void @v_shuffle_v3f16_v4f16__7_7_7(ptr addrspace(1) inreg %ptr) { 1100; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_7_7: 1101; GFX900: ; %bb.0: 1102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1103; GFX900-NEXT: ;;#ASMSTART 1104; GFX900-NEXT: ; def v[0:1] 1105; GFX900-NEXT: ;;#ASMEND 1106; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1107; GFX900-NEXT: v_mov_b32_e32 v2, 0 1108; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 1109; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 1110; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1111; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1112; GFX900-NEXT: s_waitcnt vmcnt(0) 1113; GFX900-NEXT: s_setpc_b64 s[30:31] 1114; 1115; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_7_7: 1116; GFX90A: ; %bb.0: 1117; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1118; GFX90A-NEXT: ;;#ASMSTART 1119; GFX90A-NEXT: ; def v[0:1] 1120; GFX90A-NEXT: ;;#ASMEND 1121; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1122; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1123; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v1 1124; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 1125; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1126; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1127; GFX90A-NEXT: s_waitcnt vmcnt(0) 1128; GFX90A-NEXT: s_setpc_b64 s[30:31] 1129; 1130; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_7_7: 1131; GFX940: ; %bb.0: 1132; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1133; GFX940-NEXT: ;;#ASMSTART 1134; GFX940-NEXT: ; def v[0:1] 1135; GFX940-NEXT: ;;#ASMEND 1136; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1137; GFX940-NEXT: v_mov_b32_e32 v2, 0 1138; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 1139; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 1140; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1141; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1142; GFX940-NEXT: s_waitcnt vmcnt(0) 1143; GFX940-NEXT: s_setpc_b64 s[30:31] 1144 %vec0 = call <4 x half> asm "; def $0", "=v"() 1145 %vec1 = call <4 x half> asm "; def $0", "=v"() 1146 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 7> 1147 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1148 ret void 1149} 1150 1151define void @v_shuffle_v3f16_v4f16__u_0_0(ptr addrspace(1) inreg %ptr) { 1152; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_0_0: 1153; GFX900: ; %bb.0: 1154; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1155; GFX900-NEXT: v_mov_b32_e32 v2, 0 1156; GFX900-NEXT: ;;#ASMSTART 1157; GFX900-NEXT: ; def v[0:1] 1158; GFX900-NEXT: ;;#ASMEND 1159; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1160; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1161; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1162; GFX900-NEXT: s_waitcnt vmcnt(0) 1163; GFX900-NEXT: s_setpc_b64 s[30:31] 1164; 1165; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_0_0: 1166; GFX90A: ; %bb.0: 1167; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1168; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1169; GFX90A-NEXT: ;;#ASMSTART 1170; GFX90A-NEXT: ; def v[0:1] 1171; GFX90A-NEXT: ;;#ASMEND 1172; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1173; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1174; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1175; GFX90A-NEXT: s_waitcnt vmcnt(0) 1176; GFX90A-NEXT: s_setpc_b64 s[30:31] 1177; 1178; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_0_0: 1179; GFX940: ; %bb.0: 1180; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1181; GFX940-NEXT: v_mov_b32_e32 v2, 0 1182; GFX940-NEXT: ;;#ASMSTART 1183; GFX940-NEXT: ; def v[0:1] 1184; GFX940-NEXT: ;;#ASMEND 1185; GFX940-NEXT: s_nop 0 1186; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1187; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1188; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1189; GFX940-NEXT: s_waitcnt vmcnt(0) 1190; GFX940-NEXT: s_setpc_b64 s[30:31] 1191 %vec0 = call <4 x half> asm "; def $0", "=v"() 1192 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 0, i32 0> 1193 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1194 ret void 1195} 1196 1197define void @v_shuffle_v3f16_v4f16__0_0_0(ptr addrspace(1) inreg %ptr) { 1198; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_0_0: 1199; GFX900: ; %bb.0: 1200; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1201; GFX900-NEXT: v_mov_b32_e32 v2, 0 1202; GFX900-NEXT: ;;#ASMSTART 1203; GFX900-NEXT: ; def v[0:1] 1204; GFX900-NEXT: ;;#ASMEND 1205; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1206; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 1207; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1208; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1209; GFX900-NEXT: s_waitcnt vmcnt(0) 1210; GFX900-NEXT: s_setpc_b64 s[30:31] 1211; 1212; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_0_0: 1213; GFX90A: ; %bb.0: 1214; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1215; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1216; GFX90A-NEXT: ;;#ASMSTART 1217; GFX90A-NEXT: ; def v[0:1] 1218; GFX90A-NEXT: ;;#ASMEND 1219; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1220; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 1221; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1222; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1223; GFX90A-NEXT: s_waitcnt vmcnt(0) 1224; GFX90A-NEXT: s_setpc_b64 s[30:31] 1225; 1226; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_0_0: 1227; GFX940: ; %bb.0: 1228; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1229; GFX940-NEXT: v_mov_b32_e32 v2, 0 1230; GFX940-NEXT: ;;#ASMSTART 1231; GFX940-NEXT: ; def v[0:1] 1232; GFX940-NEXT: ;;#ASMEND 1233; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1234; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 1235; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1236; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1237; GFX940-NEXT: s_waitcnt vmcnt(0) 1238; GFX940-NEXT: s_setpc_b64 s[30:31] 1239 %vec0 = call <4 x half> asm "; def $0", "=v"() 1240 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> zeroinitializer 1241 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1242 ret void 1243} 1244 1245define void @v_shuffle_v3f16_v4f16__1_0_0(ptr addrspace(1) inreg %ptr) { 1246; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_0_0: 1247; GFX900: ; %bb.0: 1248; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1249; GFX900-NEXT: v_mov_b32_e32 v2, 0 1250; GFX900-NEXT: ;;#ASMSTART 1251; GFX900-NEXT: ; def v[0:1] 1252; GFX900-NEXT: ;;#ASMEND 1253; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16 1254; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1255; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1256; GFX900-NEXT: s_waitcnt vmcnt(0) 1257; GFX900-NEXT: s_setpc_b64 s[30:31] 1258; 1259; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_0_0: 1260; GFX90A: ; %bb.0: 1261; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1262; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1263; GFX90A-NEXT: ;;#ASMSTART 1264; GFX90A-NEXT: ; def v[0:1] 1265; GFX90A-NEXT: ;;#ASMEND 1266; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16 1267; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1268; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1269; GFX90A-NEXT: s_waitcnt vmcnt(0) 1270; GFX90A-NEXT: s_setpc_b64 s[30:31] 1271; 1272; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_0_0: 1273; GFX940: ; %bb.0: 1274; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1275; GFX940-NEXT: v_mov_b32_e32 v2, 0 1276; GFX940-NEXT: ;;#ASMSTART 1277; GFX940-NEXT: ; def v[0:1] 1278; GFX940-NEXT: ;;#ASMEND 1279; GFX940-NEXT: s_nop 0 1280; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 1281; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1282; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1283; GFX940-NEXT: s_waitcnt vmcnt(0) 1284; GFX940-NEXT: s_setpc_b64 s[30:31] 1285 %vec0 = call <4 x half> asm "; def $0", "=v"() 1286 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 0, i32 0> 1287 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1288 ret void 1289} 1290 1291define void @v_shuffle_v3f16_v4f16__2_0_0(ptr addrspace(1) inreg %ptr) { 1292; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_0_0: 1293; GFX900: ; %bb.0: 1294; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1295; GFX900-NEXT: v_mov_b32_e32 v2, 0 1296; GFX900-NEXT: ;;#ASMSTART 1297; GFX900-NEXT: ; def v[0:1] 1298; GFX900-NEXT: ;;#ASMEND 1299; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1300; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 1301; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1302; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1303; GFX900-NEXT: s_waitcnt vmcnt(0) 1304; GFX900-NEXT: s_setpc_b64 s[30:31] 1305; 1306; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_0_0: 1307; GFX90A: ; %bb.0: 1308; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1309; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1310; GFX90A-NEXT: ;;#ASMSTART 1311; GFX90A-NEXT: ; def v[0:1] 1312; GFX90A-NEXT: ;;#ASMEND 1313; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1314; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 1315; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1316; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1317; GFX90A-NEXT: s_waitcnt vmcnt(0) 1318; GFX90A-NEXT: s_setpc_b64 s[30:31] 1319; 1320; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_0_0: 1321; GFX940: ; %bb.0: 1322; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1323; GFX940-NEXT: v_mov_b32_e32 v2, 0 1324; GFX940-NEXT: ;;#ASMSTART 1325; GFX940-NEXT: ; def v[0:1] 1326; GFX940-NEXT: ;;#ASMEND 1327; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1328; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 1329; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1330; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1331; GFX940-NEXT: s_waitcnt vmcnt(0) 1332; GFX940-NEXT: s_setpc_b64 s[30:31] 1333 %vec0 = call <4 x half> asm "; def $0", "=v"() 1334 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 0, i32 0> 1335 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1336 ret void 1337} 1338 1339define void @v_shuffle_v3f16_v4f16__3_0_0(ptr addrspace(1) inreg %ptr) { 1340; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_0_0: 1341; GFX900: ; %bb.0: 1342; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1343; GFX900-NEXT: v_mov_b32_e32 v2, 0 1344; GFX900-NEXT: ;;#ASMSTART 1345; GFX900-NEXT: ; def v[0:1] 1346; GFX900-NEXT: ;;#ASMEND 1347; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 1348; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1349; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1350; GFX900-NEXT: s_waitcnt vmcnt(0) 1351; GFX900-NEXT: s_setpc_b64 s[30:31] 1352; 1353; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_0_0: 1354; GFX90A: ; %bb.0: 1355; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1356; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1357; GFX90A-NEXT: ;;#ASMSTART 1358; GFX90A-NEXT: ; def v[0:1] 1359; GFX90A-NEXT: ;;#ASMEND 1360; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16 1361; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1362; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1363; GFX90A-NEXT: s_waitcnt vmcnt(0) 1364; GFX90A-NEXT: s_setpc_b64 s[30:31] 1365; 1366; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_0_0: 1367; GFX940: ; %bb.0: 1368; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1369; GFX940-NEXT: v_mov_b32_e32 v2, 0 1370; GFX940-NEXT: ;;#ASMSTART 1371; GFX940-NEXT: ; def v[0:1] 1372; GFX940-NEXT: ;;#ASMEND 1373; GFX940-NEXT: s_nop 0 1374; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 1375; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1376; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1377; GFX940-NEXT: s_waitcnt vmcnt(0) 1378; GFX940-NEXT: s_setpc_b64 s[30:31] 1379 %vec0 = call <4 x half> asm "; def $0", "=v"() 1380 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 0, i32 0> 1381 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1382 ret void 1383} 1384 1385define void @v_shuffle_v3f16_v4f16__4_0_0(ptr addrspace(1) inreg %ptr) { 1386; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_0_0: 1387; GFX900: ; %bb.0: 1388; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1389; GFX900-NEXT: v_mov_b32_e32 v2, 0 1390; GFX900-NEXT: ;;#ASMSTART 1391; GFX900-NEXT: ; def v[0:1] 1392; GFX900-NEXT: ;;#ASMEND 1393; GFX900-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1394; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 1395; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1396; GFX900-NEXT: s_waitcnt vmcnt(0) 1397; GFX900-NEXT: s_setpc_b64 s[30:31] 1398; 1399; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_0_0: 1400; GFX90A: ; %bb.0: 1401; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1402; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1403; GFX90A-NEXT: ;;#ASMSTART 1404; GFX90A-NEXT: ; def v[0:1] 1405; GFX90A-NEXT: ;;#ASMEND 1406; GFX90A-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1407; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 1408; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1409; GFX90A-NEXT: s_waitcnt vmcnt(0) 1410; GFX90A-NEXT: s_setpc_b64 s[30:31] 1411; 1412; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_0_0: 1413; GFX940: ; %bb.0: 1414; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1415; GFX940-NEXT: v_mov_b32_e32 v2, 0 1416; GFX940-NEXT: ;;#ASMSTART 1417; GFX940-NEXT: ; def v[0:1] 1418; GFX940-NEXT: ;;#ASMEND 1419; GFX940-NEXT: s_nop 0 1420; GFX940-NEXT: v_lshlrev_b32_e32 v1, 16, v0 1421; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 1422; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1423; GFX940-NEXT: s_waitcnt vmcnt(0) 1424; GFX940-NEXT: s_setpc_b64 s[30:31] 1425 %vec0 = call <4 x half> asm "; def $0", "=v"() 1426 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 0, i32 0> 1427 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1428 ret void 1429} 1430 1431define void @v_shuffle_v3f16_v4f16__5_0_0(ptr addrspace(1) inreg %ptr) { 1432; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_0_0: 1433; GFX900: ; %bb.0: 1434; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1435; GFX900-NEXT: ;;#ASMSTART 1436; GFX900-NEXT: ; def v[0:1] 1437; GFX900-NEXT: ;;#ASMEND 1438; GFX900-NEXT: v_mov_b32_e32 v3, 0 1439; GFX900-NEXT: ;;#ASMSTART 1440; GFX900-NEXT: ; def v[1:2] 1441; GFX900-NEXT: ;;#ASMEND 1442; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 1443; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1444; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1445; GFX900-NEXT: s_waitcnt vmcnt(0) 1446; GFX900-NEXT: s_setpc_b64 s[30:31] 1447; 1448; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_0_0: 1449; GFX90A: ; %bb.0: 1450; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1451; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1452; GFX90A-NEXT: ;;#ASMSTART 1453; GFX90A-NEXT: ; def v[0:1] 1454; GFX90A-NEXT: ;;#ASMEND 1455; GFX90A-NEXT: ;;#ASMSTART 1456; GFX90A-NEXT: ; def v[2:3] 1457; GFX90A-NEXT: ;;#ASMEND 1458; GFX90A-NEXT: v_alignbit_b32 v1, v0, v2, 16 1459; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1460; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1461; GFX90A-NEXT: s_waitcnt vmcnt(0) 1462; GFX90A-NEXT: s_setpc_b64 s[30:31] 1463; 1464; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_0_0: 1465; GFX940: ; %bb.0: 1466; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1467; GFX940-NEXT: v_mov_b32_e32 v4, 0 1468; GFX940-NEXT: ;;#ASMSTART 1469; GFX940-NEXT: ; def v[0:1] 1470; GFX940-NEXT: ;;#ASMEND 1471; GFX940-NEXT: ;;#ASMSTART 1472; GFX940-NEXT: ; def v[2:3] 1473; GFX940-NEXT: ;;#ASMEND 1474; GFX940-NEXT: s_nop 0 1475; GFX940-NEXT: v_alignbit_b32 v1, v0, v2, 16 1476; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1477; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1478; GFX940-NEXT: s_waitcnt vmcnt(0) 1479; GFX940-NEXT: s_setpc_b64 s[30:31] 1480 %vec0 = call <4 x half> asm "; def $0", "=v"() 1481 %vec1 = call <4 x half> asm "; def $0", "=v"() 1482 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 0, i32 0> 1483 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1484 ret void 1485} 1486 1487define void @v_shuffle_v3f16_v4f16__6_0_0(ptr addrspace(1) inreg %ptr) { 1488; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_0_0: 1489; GFX900: ; %bb.0: 1490; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1491; GFX900-NEXT: ;;#ASMSTART 1492; GFX900-NEXT: ; def v[0:1] 1493; GFX900-NEXT: ;;#ASMEND 1494; GFX900-NEXT: v_mov_b32_e32 v3, 0 1495; GFX900-NEXT: ;;#ASMSTART 1496; GFX900-NEXT: ; def v[1:2] 1497; GFX900-NEXT: ;;#ASMEND 1498; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1499; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 1500; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1501; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1502; GFX900-NEXT: s_waitcnt vmcnt(0) 1503; GFX900-NEXT: s_setpc_b64 s[30:31] 1504; 1505; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_0_0: 1506; GFX90A: ; %bb.0: 1507; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1508; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1509; GFX90A-NEXT: ;;#ASMSTART 1510; GFX90A-NEXT: ; def v[0:1] 1511; GFX90A-NEXT: ;;#ASMEND 1512; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1513; GFX90A-NEXT: ;;#ASMSTART 1514; GFX90A-NEXT: ; def v[2:3] 1515; GFX90A-NEXT: ;;#ASMEND 1516; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 1517; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1518; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1519; GFX90A-NEXT: s_waitcnt vmcnt(0) 1520; GFX90A-NEXT: s_setpc_b64 s[30:31] 1521; 1522; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_0_0: 1523; GFX940: ; %bb.0: 1524; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1525; GFX940-NEXT: v_mov_b32_e32 v4, 0 1526; GFX940-NEXT: ;;#ASMSTART 1527; GFX940-NEXT: ; def v[0:1] 1528; GFX940-NEXT: ;;#ASMEND 1529; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1530; GFX940-NEXT: ;;#ASMSTART 1531; GFX940-NEXT: ; def v[2:3] 1532; GFX940-NEXT: ;;#ASMEND 1533; GFX940-NEXT: s_nop 0 1534; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 1535; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1536; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1537; GFX940-NEXT: s_waitcnt vmcnt(0) 1538; GFX940-NEXT: s_setpc_b64 s[30:31] 1539 %vec0 = call <4 x half> asm "; def $0", "=v"() 1540 %vec1 = call <4 x half> asm "; def $0", "=v"() 1541 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 0, i32 0> 1542 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1543 ret void 1544} 1545 1546define void @v_shuffle_v3f16_v4f16__7_0_0(ptr addrspace(1) inreg %ptr) { 1547; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_0: 1548; GFX900: ; %bb.0: 1549; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1550; GFX900-NEXT: ;;#ASMSTART 1551; GFX900-NEXT: ; def v[0:1] 1552; GFX900-NEXT: ;;#ASMEND 1553; GFX900-NEXT: v_mov_b32_e32 v3, 0 1554; GFX900-NEXT: ;;#ASMSTART 1555; GFX900-NEXT: ; def v[1:2] 1556; GFX900-NEXT: ;;#ASMEND 1557; GFX900-NEXT: v_alignbit_b32 v1, v0, v2, 16 1558; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1559; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1560; GFX900-NEXT: s_waitcnt vmcnt(0) 1561; GFX900-NEXT: s_setpc_b64 s[30:31] 1562; 1563; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_0: 1564; GFX90A: ; %bb.0: 1565; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1566; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1567; GFX90A-NEXT: ;;#ASMSTART 1568; GFX90A-NEXT: ; def v[0:1] 1569; GFX90A-NEXT: ;;#ASMEND 1570; GFX90A-NEXT: ;;#ASMSTART 1571; GFX90A-NEXT: ; def v[2:3] 1572; GFX90A-NEXT: ;;#ASMEND 1573; GFX90A-NEXT: v_alignbit_b32 v1, v0, v3, 16 1574; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1575; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1576; GFX90A-NEXT: s_waitcnt vmcnt(0) 1577; GFX90A-NEXT: s_setpc_b64 s[30:31] 1578; 1579; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_0: 1580; GFX940: ; %bb.0: 1581; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1582; GFX940-NEXT: v_mov_b32_e32 v4, 0 1583; GFX940-NEXT: ;;#ASMSTART 1584; GFX940-NEXT: ; def v[0:1] 1585; GFX940-NEXT: ;;#ASMEND 1586; GFX940-NEXT: ;;#ASMSTART 1587; GFX940-NEXT: ; def v[2:3] 1588; GFX940-NEXT: ;;#ASMEND 1589; GFX940-NEXT: s_nop 0 1590; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 1591; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1592; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1593; GFX940-NEXT: s_waitcnt vmcnt(0) 1594; GFX940-NEXT: s_setpc_b64 s[30:31] 1595 %vec0 = call <4 x half> asm "; def $0", "=v"() 1596 %vec1 = call <4 x half> asm "; def $0", "=v"() 1597 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 0> 1598 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1599 ret void 1600} 1601 1602define void @v_shuffle_v3f16_v4f16__7_u_0(ptr addrspace(1) inreg %ptr) { 1603; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_0: 1604; GFX900: ; %bb.0: 1605; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1606; GFX900-NEXT: ;;#ASMSTART 1607; GFX900-NEXT: ; def v[0:1] 1608; GFX900-NEXT: ;;#ASMEND 1609; GFX900-NEXT: v_mov_b32_e32 v3, 0 1610; GFX900-NEXT: ;;#ASMSTART 1611; GFX900-NEXT: ; def v[1:2] 1612; GFX900-NEXT: ;;#ASMEND 1613; GFX900-NEXT: v_alignbit_b32 v1, s4, v2, 16 1614; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1615; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1616; GFX900-NEXT: s_waitcnt vmcnt(0) 1617; GFX900-NEXT: s_setpc_b64 s[30:31] 1618; 1619; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_0: 1620; GFX90A: ; %bb.0: 1621; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1622; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1623; GFX90A-NEXT: ;;#ASMSTART 1624; GFX90A-NEXT: ; def v[0:1] 1625; GFX90A-NEXT: ;;#ASMEND 1626; GFX90A-NEXT: ;;#ASMSTART 1627; GFX90A-NEXT: ; def v[2:3] 1628; GFX90A-NEXT: ;;#ASMEND 1629; GFX90A-NEXT: v_alignbit_b32 v1, s4, v3, 16 1630; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1631; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1632; GFX90A-NEXT: s_waitcnt vmcnt(0) 1633; GFX90A-NEXT: s_setpc_b64 s[30:31] 1634; 1635; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_0: 1636; GFX940: ; %bb.0: 1637; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1638; GFX940-NEXT: v_mov_b32_e32 v4, 0 1639; GFX940-NEXT: ;;#ASMSTART 1640; GFX940-NEXT: ; def v[0:1] 1641; GFX940-NEXT: ;;#ASMEND 1642; GFX940-NEXT: ;;#ASMSTART 1643; GFX940-NEXT: ; def v[2:3] 1644; GFX940-NEXT: ;;#ASMEND 1645; GFX940-NEXT: s_nop 0 1646; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 1647; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1648; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1649; GFX940-NEXT: s_waitcnt vmcnt(0) 1650; GFX940-NEXT: s_setpc_b64 s[30:31] 1651 %vec0 = call <4 x half> asm "; def $0", "=v"() 1652 %vec1 = call <4 x half> asm "; def $0", "=v"() 1653 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 0> 1654 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1655 ret void 1656} 1657 1658define void @v_shuffle_v3f16_v4f16__7_1_0(ptr addrspace(1) inreg %ptr) { 1659; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_0: 1660; GFX900: ; %bb.0: 1661; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1662; GFX900-NEXT: ;;#ASMSTART 1663; GFX900-NEXT: ; def v[0:1] 1664; GFX900-NEXT: ;;#ASMEND 1665; GFX900-NEXT: v_mov_b32_e32 v3, 0 1666; GFX900-NEXT: ;;#ASMSTART 1667; GFX900-NEXT: ; def v[1:2] 1668; GFX900-NEXT: ;;#ASMEND 1669; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1670; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 1671; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1672; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1673; GFX900-NEXT: s_waitcnt vmcnt(0) 1674; GFX900-NEXT: s_setpc_b64 s[30:31] 1675; 1676; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_0: 1677; GFX90A: ; %bb.0: 1678; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1679; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1680; GFX90A-NEXT: ;;#ASMSTART 1681; GFX90A-NEXT: ; def v[0:1] 1682; GFX90A-NEXT: ;;#ASMEND 1683; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1684; GFX90A-NEXT: ;;#ASMSTART 1685; GFX90A-NEXT: ; def v[2:3] 1686; GFX90A-NEXT: ;;#ASMEND 1687; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 1688; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1689; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1690; GFX90A-NEXT: s_waitcnt vmcnt(0) 1691; GFX90A-NEXT: s_setpc_b64 s[30:31] 1692; 1693; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_0: 1694; GFX940: ; %bb.0: 1695; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1696; GFX940-NEXT: v_mov_b32_e32 v4, 0 1697; GFX940-NEXT: ;;#ASMSTART 1698; GFX940-NEXT: ; def v[0:1] 1699; GFX940-NEXT: ;;#ASMEND 1700; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1701; GFX940-NEXT: ;;#ASMSTART 1702; GFX940-NEXT: ; def v[2:3] 1703; GFX940-NEXT: ;;#ASMEND 1704; GFX940-NEXT: s_nop 0 1705; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 1706; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1707; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1708; GFX940-NEXT: s_waitcnt vmcnt(0) 1709; GFX940-NEXT: s_setpc_b64 s[30:31] 1710 %vec0 = call <4 x half> asm "; def $0", "=v"() 1711 %vec1 = call <4 x half> asm "; def $0", "=v"() 1712 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 0> 1713 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1714 ret void 1715} 1716 1717define void @v_shuffle_v3f16_v4f16__7_2_0(ptr addrspace(1) inreg %ptr) { 1718; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_0: 1719; GFX900: ; %bb.0: 1720; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1721; GFX900-NEXT: v_mov_b32_e32 v4, 0 1722; GFX900-NEXT: ;;#ASMSTART 1723; GFX900-NEXT: ; def v[0:1] 1724; GFX900-NEXT: ;;#ASMEND 1725; GFX900-NEXT: ;;#ASMSTART 1726; GFX900-NEXT: ; def v[2:3] 1727; GFX900-NEXT: ;;#ASMEND 1728; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 1729; GFX900-NEXT: global_store_short v4, v0, s[16:17] offset:4 1730; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 1731; GFX900-NEXT: s_waitcnt vmcnt(0) 1732; GFX900-NEXT: s_setpc_b64 s[30:31] 1733; 1734; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_0: 1735; GFX90A: ; %bb.0: 1736; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1737; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1738; GFX90A-NEXT: ;;#ASMSTART 1739; GFX90A-NEXT: ; def v[0:1] 1740; GFX90A-NEXT: ;;#ASMEND 1741; GFX90A-NEXT: ;;#ASMSTART 1742; GFX90A-NEXT: ; def v[2:3] 1743; GFX90A-NEXT: ;;#ASMEND 1744; GFX90A-NEXT: v_alignbit_b32 v1, v1, v3, 16 1745; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1746; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1747; GFX90A-NEXT: s_waitcnt vmcnt(0) 1748; GFX90A-NEXT: s_setpc_b64 s[30:31] 1749; 1750; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_0: 1751; GFX940: ; %bb.0: 1752; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1753; GFX940-NEXT: v_mov_b32_e32 v4, 0 1754; GFX940-NEXT: ;;#ASMSTART 1755; GFX940-NEXT: ; def v[0:1] 1756; GFX940-NEXT: ;;#ASMEND 1757; GFX940-NEXT: ;;#ASMSTART 1758; GFX940-NEXT: ; def v[2:3] 1759; GFX940-NEXT: ;;#ASMEND 1760; GFX940-NEXT: s_nop 0 1761; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 1762; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1763; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1764; GFX940-NEXT: s_waitcnt vmcnt(0) 1765; GFX940-NEXT: s_setpc_b64 s[30:31] 1766 %vec0 = call <4 x half> asm "; def $0", "=v"() 1767 %vec1 = call <4 x half> asm "; def $0", "=v"() 1768 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 0> 1769 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1770 ret void 1771} 1772 1773define void @v_shuffle_v3f16_v4f16__7_3_0(ptr addrspace(1) inreg %ptr) { 1774; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_0: 1775; GFX900: ; %bb.0: 1776; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1777; GFX900-NEXT: v_mov_b32_e32 v4, 0 1778; GFX900-NEXT: ;;#ASMSTART 1779; GFX900-NEXT: ; def v[0:1] 1780; GFX900-NEXT: ;;#ASMEND 1781; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1782; GFX900-NEXT: ;;#ASMSTART 1783; GFX900-NEXT: ; def v[2:3] 1784; GFX900-NEXT: ;;#ASMEND 1785; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 1786; GFX900-NEXT: global_store_short v4, v0, s[16:17] offset:4 1787; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 1788; GFX900-NEXT: s_waitcnt vmcnt(0) 1789; GFX900-NEXT: s_setpc_b64 s[30:31] 1790; 1791; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_0: 1792; GFX90A: ; %bb.0: 1793; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1794; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1795; GFX90A-NEXT: ;;#ASMSTART 1796; GFX90A-NEXT: ; def v[0:1] 1797; GFX90A-NEXT: ;;#ASMEND 1798; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1799; GFX90A-NEXT: ;;#ASMSTART 1800; GFX90A-NEXT: ; def v[2:3] 1801; GFX90A-NEXT: ;;#ASMEND 1802; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4 1803; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1804; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1805; GFX90A-NEXT: s_waitcnt vmcnt(0) 1806; GFX90A-NEXT: s_setpc_b64 s[30:31] 1807; 1808; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_0: 1809; GFX940: ; %bb.0: 1810; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1811; GFX940-NEXT: v_mov_b32_e32 v4, 0 1812; GFX940-NEXT: ;;#ASMSTART 1813; GFX940-NEXT: ; def v[0:1] 1814; GFX940-NEXT: ;;#ASMEND 1815; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1816; GFX940-NEXT: ;;#ASMSTART 1817; GFX940-NEXT: ; def v[2:3] 1818; GFX940-NEXT: ;;#ASMEND 1819; GFX940-NEXT: s_nop 0 1820; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 1821; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1822; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1823; GFX940-NEXT: s_waitcnt vmcnt(0) 1824; GFX940-NEXT: s_setpc_b64 s[30:31] 1825 %vec0 = call <4 x half> asm "; def $0", "=v"() 1826 %vec1 = call <4 x half> asm "; def $0", "=v"() 1827 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 0> 1828 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1829 ret void 1830} 1831 1832define void @v_shuffle_v3f16_v4f16__7_4_0(ptr addrspace(1) inreg %ptr) { 1833; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_0: 1834; GFX900: ; %bb.0: 1835; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1836; GFX900-NEXT: ;;#ASMSTART 1837; GFX900-NEXT: ; def v[0:1] 1838; GFX900-NEXT: ;;#ASMEND 1839; GFX900-NEXT: v_mov_b32_e32 v3, 0 1840; GFX900-NEXT: ;;#ASMSTART 1841; GFX900-NEXT: ; def v[1:2] 1842; GFX900-NEXT: ;;#ASMEND 1843; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16 1844; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1845; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1846; GFX900-NEXT: s_waitcnt vmcnt(0) 1847; GFX900-NEXT: s_setpc_b64 s[30:31] 1848; 1849; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_0: 1850; GFX90A: ; %bb.0: 1851; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1852; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1853; GFX90A-NEXT: ;;#ASMSTART 1854; GFX90A-NEXT: ; def v[0:1] 1855; GFX90A-NEXT: ;;#ASMEND 1856; GFX90A-NEXT: ;;#ASMSTART 1857; GFX90A-NEXT: ; def v[2:3] 1858; GFX90A-NEXT: ;;#ASMEND 1859; GFX90A-NEXT: v_alignbit_b32 v1, v2, v3, 16 1860; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1861; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1862; GFX90A-NEXT: s_waitcnt vmcnt(0) 1863; GFX90A-NEXT: s_setpc_b64 s[30:31] 1864; 1865; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_0: 1866; GFX940: ; %bb.0: 1867; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1868; GFX940-NEXT: v_mov_b32_e32 v4, 0 1869; GFX940-NEXT: ;;#ASMSTART 1870; GFX940-NEXT: ; def v[0:1] 1871; GFX940-NEXT: ;;#ASMEND 1872; GFX940-NEXT: ;;#ASMSTART 1873; GFX940-NEXT: ; def v[2:3] 1874; GFX940-NEXT: ;;#ASMEND 1875; GFX940-NEXT: s_nop 0 1876; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 1877; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1878; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1879; GFX940-NEXT: s_waitcnt vmcnt(0) 1880; GFX940-NEXT: s_setpc_b64 s[30:31] 1881 %vec0 = call <4 x half> asm "; def $0", "=v"() 1882 %vec1 = call <4 x half> asm "; def $0", "=v"() 1883 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 0> 1884 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1885 ret void 1886} 1887 1888define void @v_shuffle_v3f16_v4f16__7_5_0(ptr addrspace(1) inreg %ptr) { 1889; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_0: 1890; GFX900: ; %bb.0: 1891; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1892; GFX900-NEXT: ;;#ASMSTART 1893; GFX900-NEXT: ; def v[0:1] 1894; GFX900-NEXT: ;;#ASMEND 1895; GFX900-NEXT: v_mov_b32_e32 v3, 0 1896; GFX900-NEXT: ;;#ASMSTART 1897; GFX900-NEXT: ; def v[1:2] 1898; GFX900-NEXT: ;;#ASMEND 1899; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1900; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 1901; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1902; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1903; GFX900-NEXT: s_waitcnt vmcnt(0) 1904; GFX900-NEXT: s_setpc_b64 s[30:31] 1905; 1906; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_0: 1907; GFX90A: ; %bb.0: 1908; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1909; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1910; GFX90A-NEXT: ;;#ASMSTART 1911; GFX90A-NEXT: ; def v[0:1] 1912; GFX90A-NEXT: ;;#ASMEND 1913; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1914; GFX90A-NEXT: ;;#ASMSTART 1915; GFX90A-NEXT: ; def v[2:3] 1916; GFX90A-NEXT: ;;#ASMEND 1917; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4 1918; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1919; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1920; GFX90A-NEXT: s_waitcnt vmcnt(0) 1921; GFX90A-NEXT: s_setpc_b64 s[30:31] 1922; 1923; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_0: 1924; GFX940: ; %bb.0: 1925; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1926; GFX940-NEXT: v_mov_b32_e32 v4, 0 1927; GFX940-NEXT: ;;#ASMSTART 1928; GFX940-NEXT: ; def v[0:1] 1929; GFX940-NEXT: ;;#ASMEND 1930; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1931; GFX940-NEXT: ;;#ASMSTART 1932; GFX940-NEXT: ; def v[2:3] 1933; GFX940-NEXT: ;;#ASMEND 1934; GFX940-NEXT: s_nop 0 1935; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 1936; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1937; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1938; GFX940-NEXT: s_waitcnt vmcnt(0) 1939; GFX940-NEXT: s_setpc_b64 s[30:31] 1940 %vec0 = call <4 x half> asm "; def $0", "=v"() 1941 %vec1 = call <4 x half> asm "; def $0", "=v"() 1942 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 0> 1943 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 1944 ret void 1945} 1946 1947define void @v_shuffle_v3f16_v4f16__7_6_0(ptr addrspace(1) inreg %ptr) { 1948; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_0: 1949; GFX900: ; %bb.0: 1950; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1951; GFX900-NEXT: ;;#ASMSTART 1952; GFX900-NEXT: ; def v[0:1] 1953; GFX900-NEXT: ;;#ASMEND 1954; GFX900-NEXT: v_mov_b32_e32 v3, 0 1955; GFX900-NEXT: ;;#ASMSTART 1956; GFX900-NEXT: ; def v[1:2] 1957; GFX900-NEXT: ;;#ASMEND 1958; GFX900-NEXT: v_alignbit_b32 v1, v2, v2, 16 1959; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 1960; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 1961; GFX900-NEXT: s_waitcnt vmcnt(0) 1962; GFX900-NEXT: s_setpc_b64 s[30:31] 1963; 1964; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_0: 1965; GFX90A: ; %bb.0: 1966; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1967; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1968; GFX90A-NEXT: ;;#ASMSTART 1969; GFX90A-NEXT: ; def v[0:1] 1970; GFX90A-NEXT: ;;#ASMEND 1971; GFX90A-NEXT: ;;#ASMSTART 1972; GFX90A-NEXT: ; def v[2:3] 1973; GFX90A-NEXT: ;;#ASMEND 1974; GFX90A-NEXT: v_alignbit_b32 v1, v3, v3, 16 1975; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 1976; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 1977; GFX90A-NEXT: s_waitcnt vmcnt(0) 1978; GFX90A-NEXT: s_setpc_b64 s[30:31] 1979; 1980; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_0: 1981; GFX940: ; %bb.0: 1982; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1983; GFX940-NEXT: v_mov_b32_e32 v4, 0 1984; GFX940-NEXT: ;;#ASMSTART 1985; GFX940-NEXT: ; def v[0:1] 1986; GFX940-NEXT: ;;#ASMEND 1987; GFX940-NEXT: ;;#ASMSTART 1988; GFX940-NEXT: ; def v[2:3] 1989; GFX940-NEXT: ;;#ASMEND 1990; GFX940-NEXT: s_nop 0 1991; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 1992; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 1993; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 1994; GFX940-NEXT: s_waitcnt vmcnt(0) 1995; GFX940-NEXT: s_setpc_b64 s[30:31] 1996 %vec0 = call <4 x half> asm "; def $0", "=v"() 1997 %vec1 = call <4 x half> asm "; def $0", "=v"() 1998 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 0> 1999 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2000 ret void 2001} 2002 2003define void @v_shuffle_v3f16_v4f16__u_1_1(ptr addrspace(1) inreg %ptr) { 2004; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_1_1: 2005; GFX900: ; %bb.0: 2006; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2007; GFX900-NEXT: v_mov_b32_e32 v2, 0 2008; GFX900-NEXT: ;;#ASMSTART 2009; GFX900-NEXT: ; def v[0:1] 2010; GFX900-NEXT: ;;#ASMEND 2011; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2012; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2013; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 2014; GFX900-NEXT: s_waitcnt vmcnt(0) 2015; GFX900-NEXT: s_setpc_b64 s[30:31] 2016; 2017; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_1_1: 2018; GFX90A: ; %bb.0: 2019; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2020; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2021; GFX90A-NEXT: ;;#ASMSTART 2022; GFX90A-NEXT: ; def v[0:1] 2023; GFX90A-NEXT: ;;#ASMEND 2024; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2025; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2026; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 2027; GFX90A-NEXT: s_waitcnt vmcnt(0) 2028; GFX90A-NEXT: s_setpc_b64 s[30:31] 2029; 2030; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_1_1: 2031; GFX940: ; %bb.0: 2032; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2033; GFX940-NEXT: v_mov_b32_e32 v2, 0 2034; GFX940-NEXT: ;;#ASMSTART 2035; GFX940-NEXT: ; def v[0:1] 2036; GFX940-NEXT: ;;#ASMEND 2037; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2038; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2039; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 2040; GFX940-NEXT: s_waitcnt vmcnt(0) 2041; GFX940-NEXT: s_setpc_b64 s[30:31] 2042 %vec0 = call <4 x half> asm "; def $0", "=v"() 2043 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 1, i32 1> 2044 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2045 ret void 2046} 2047 2048define void @v_shuffle_v3f16_v4f16__0_1_1(ptr addrspace(1) inreg %ptr) { 2049; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_1_1: 2050; GFX900: ; %bb.0: 2051; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2052; GFX900-NEXT: v_mov_b32_e32 v2, 0 2053; GFX900-NEXT: ;;#ASMSTART 2054; GFX900-NEXT: ; def v[0:1] 2055; GFX900-NEXT: ;;#ASMEND 2056; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2057; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2058; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 2059; GFX900-NEXT: s_waitcnt vmcnt(0) 2060; GFX900-NEXT: s_setpc_b64 s[30:31] 2061; 2062; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_1_1: 2063; GFX90A: ; %bb.0: 2064; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2065; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2066; GFX90A-NEXT: ;;#ASMSTART 2067; GFX90A-NEXT: ; def v[0:1] 2068; GFX90A-NEXT: ;;#ASMEND 2069; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2070; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2071; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 2072; GFX90A-NEXT: s_waitcnt vmcnt(0) 2073; GFX90A-NEXT: s_setpc_b64 s[30:31] 2074; 2075; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_1_1: 2076; GFX940: ; %bb.0: 2077; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2078; GFX940-NEXT: v_mov_b32_e32 v2, 0 2079; GFX940-NEXT: ;;#ASMSTART 2080; GFX940-NEXT: ; def v[0:1] 2081; GFX940-NEXT: ;;#ASMEND 2082; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2083; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2084; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 2085; GFX940-NEXT: s_waitcnt vmcnt(0) 2086; GFX940-NEXT: s_setpc_b64 s[30:31] 2087 %vec0 = call <4 x half> asm "; def $0", "=v"() 2088 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 1> 2089 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2090 ret void 2091} 2092 2093define void @v_shuffle_v3f16_v4f16__1_1_1(ptr addrspace(1) inreg %ptr) { 2094; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_1_1: 2095; GFX900: ; %bb.0: 2096; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2097; GFX900-NEXT: ;;#ASMSTART 2098; GFX900-NEXT: ; def v[0:1] 2099; GFX900-NEXT: ;;#ASMEND 2100; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2101; GFX900-NEXT: v_mov_b32_e32 v2, 0 2102; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 2103; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2104; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 2105; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 2106; GFX900-NEXT: s_waitcnt vmcnt(0) 2107; GFX900-NEXT: s_setpc_b64 s[30:31] 2108; 2109; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_1_1: 2110; GFX90A: ; %bb.0: 2111; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2112; GFX90A-NEXT: ;;#ASMSTART 2113; GFX90A-NEXT: ; def v[0:1] 2114; GFX90A-NEXT: ;;#ASMEND 2115; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2116; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2117; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 2118; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2119; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 2120; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 2121; GFX90A-NEXT: s_waitcnt vmcnt(0) 2122; GFX90A-NEXT: s_setpc_b64 s[30:31] 2123; 2124; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_1_1: 2125; GFX940: ; %bb.0: 2126; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2127; GFX940-NEXT: ;;#ASMSTART 2128; GFX940-NEXT: ; def v[0:1] 2129; GFX940-NEXT: ;;#ASMEND 2130; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2131; GFX940-NEXT: v_mov_b32_e32 v2, 0 2132; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 2133; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2134; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 2135; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 2136; GFX940-NEXT: s_waitcnt vmcnt(0) 2137; GFX940-NEXT: s_setpc_b64 s[30:31] 2138 %vec0 = call <4 x half> asm "; def $0", "=v"() 2139 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 1, i32 1> 2140 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2141 ret void 2142} 2143 2144define void @v_shuffle_v3f16_v4f16__2_1_1(ptr addrspace(1) inreg %ptr) { 2145; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_1_1: 2146; GFX900: ; %bb.0: 2147; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2148; GFX900-NEXT: ;;#ASMSTART 2149; GFX900-NEXT: ; def v[0:1] 2150; GFX900-NEXT: ;;#ASMEND 2151; GFX900-NEXT: s_mov_b32 s4, 0xffff 2152; GFX900-NEXT: v_mov_b32_e32 v2, 0 2153; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 2154; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2155; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 2156; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 2157; GFX900-NEXT: s_waitcnt vmcnt(0) 2158; GFX900-NEXT: s_setpc_b64 s[30:31] 2159; 2160; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_1_1: 2161; GFX90A: ; %bb.0: 2162; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2163; GFX90A-NEXT: ;;#ASMSTART 2164; GFX90A-NEXT: ; def v[0:1] 2165; GFX90A-NEXT: ;;#ASMEND 2166; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2167; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2168; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0 2169; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2170; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 2171; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 2172; GFX90A-NEXT: s_waitcnt vmcnt(0) 2173; GFX90A-NEXT: s_setpc_b64 s[30:31] 2174; 2175; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_1_1: 2176; GFX940: ; %bb.0: 2177; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2178; GFX940-NEXT: ;;#ASMSTART 2179; GFX940-NEXT: ; def v[0:1] 2180; GFX940-NEXT: ;;#ASMEND 2181; GFX940-NEXT: s_mov_b32 s2, 0xffff 2182; GFX940-NEXT: v_mov_b32_e32 v2, 0 2183; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 2184; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2185; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 2186; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 2187; GFX940-NEXT: s_waitcnt vmcnt(0) 2188; GFX940-NEXT: s_setpc_b64 s[30:31] 2189 %vec0 = call <4 x half> asm "; def $0", "=v"() 2190 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 1, i32 1> 2191 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2192 ret void 2193} 2194 2195define void @v_shuffle_v3f16_v4f16__3_1_1(ptr addrspace(1) inreg %ptr) { 2196; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_1_1: 2197; GFX900: ; %bb.0: 2198; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2199; GFX900-NEXT: ;;#ASMSTART 2200; GFX900-NEXT: ; def v[0:1] 2201; GFX900-NEXT: ;;#ASMEND 2202; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2203; GFX900-NEXT: v_mov_b32_e32 v2, 0 2204; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 2205; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2206; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 2207; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 2208; GFX900-NEXT: s_waitcnt vmcnt(0) 2209; GFX900-NEXT: s_setpc_b64 s[30:31] 2210; 2211; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_1_1: 2212; GFX90A: ; %bb.0: 2213; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2214; GFX90A-NEXT: ;;#ASMSTART 2215; GFX90A-NEXT: ; def v[0:1] 2216; GFX90A-NEXT: ;;#ASMEND 2217; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2218; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2219; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 2220; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2221; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 2222; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 2223; GFX90A-NEXT: s_waitcnt vmcnt(0) 2224; GFX90A-NEXT: s_setpc_b64 s[30:31] 2225; 2226; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_1_1: 2227; GFX940: ; %bb.0: 2228; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2229; GFX940-NEXT: ;;#ASMSTART 2230; GFX940-NEXT: ; def v[0:1] 2231; GFX940-NEXT: ;;#ASMEND 2232; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2233; GFX940-NEXT: v_mov_b32_e32 v2, 0 2234; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 2235; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2236; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 2237; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 2238; GFX940-NEXT: s_waitcnt vmcnt(0) 2239; GFX940-NEXT: s_setpc_b64 s[30:31] 2240 %vec0 = call <4 x half> asm "; def $0", "=v"() 2241 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 1, i32 1> 2242 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2243 ret void 2244} 2245 2246define void @v_shuffle_v3f16_v4f16__4_1_1(ptr addrspace(1) inreg %ptr) { 2247; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_1_1: 2248; GFX900: ; %bb.0: 2249; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2250; GFX900-NEXT: v_mov_b32_e32 v2, 0 2251; GFX900-NEXT: ;;#ASMSTART 2252; GFX900-NEXT: ; def v[0:1] 2253; GFX900-NEXT: ;;#ASMEND 2254; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2255; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2256; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 2257; GFX900-NEXT: s_waitcnt vmcnt(0) 2258; GFX900-NEXT: s_setpc_b64 s[30:31] 2259; 2260; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_1_1: 2261; GFX90A: ; %bb.0: 2262; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2263; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2264; GFX90A-NEXT: ;;#ASMSTART 2265; GFX90A-NEXT: ; def v[0:1] 2266; GFX90A-NEXT: ;;#ASMEND 2267; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2268; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2269; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 2270; GFX90A-NEXT: s_waitcnt vmcnt(0) 2271; GFX90A-NEXT: s_setpc_b64 s[30:31] 2272; 2273; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_1_1: 2274; GFX940: ; %bb.0: 2275; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2276; GFX940-NEXT: v_mov_b32_e32 v2, 0 2277; GFX940-NEXT: ;;#ASMSTART 2278; GFX940-NEXT: ; def v[0:1] 2279; GFX940-NEXT: ;;#ASMEND 2280; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2281; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2282; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 2283; GFX940-NEXT: s_waitcnt vmcnt(0) 2284; GFX940-NEXT: s_setpc_b64 s[30:31] 2285 %vec0 = call <4 x half> asm "; def $0", "=v"() 2286 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 1, i32 1> 2287 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2288 ret void 2289} 2290 2291define void @v_shuffle_v3f16_v4f16__5_1_1(ptr addrspace(1) inreg %ptr) { 2292; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_1_1: 2293; GFX900: ; %bb.0: 2294; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2295; GFX900-NEXT: ;;#ASMSTART 2296; GFX900-NEXT: ; def v[0:1] 2297; GFX900-NEXT: ;;#ASMEND 2298; GFX900-NEXT: ;;#ASMSTART 2299; GFX900-NEXT: ; def v[1:2] 2300; GFX900-NEXT: ;;#ASMEND 2301; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2302; GFX900-NEXT: v_mov_b32_e32 v3, 0 2303; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 2304; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2305; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2306; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 2307; GFX900-NEXT: s_waitcnt vmcnt(0) 2308; GFX900-NEXT: s_setpc_b64 s[30:31] 2309; 2310; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_1_1: 2311; GFX90A: ; %bb.0: 2312; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2313; GFX90A-NEXT: ;;#ASMSTART 2314; GFX90A-NEXT: ; def v[0:1] 2315; GFX90A-NEXT: ;;#ASMEND 2316; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2317; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2318; GFX90A-NEXT: ;;#ASMSTART 2319; GFX90A-NEXT: ; def v[2:3] 2320; GFX90A-NEXT: ;;#ASMEND 2321; GFX90A-NEXT: v_perm_b32 v1, v0, v2, s4 2322; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2323; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2324; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 2325; GFX90A-NEXT: s_waitcnt vmcnt(0) 2326; GFX90A-NEXT: s_setpc_b64 s[30:31] 2327; 2328; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_1_1: 2329; GFX940: ; %bb.0: 2330; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2331; GFX940-NEXT: ;;#ASMSTART 2332; GFX940-NEXT: ; def v[0:1] 2333; GFX940-NEXT: ;;#ASMEND 2334; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2335; GFX940-NEXT: v_mov_b32_e32 v4, 0 2336; GFX940-NEXT: ;;#ASMSTART 2337; GFX940-NEXT: ; def v[2:3] 2338; GFX940-NEXT: ;;#ASMEND 2339; GFX940-NEXT: s_nop 0 2340; GFX940-NEXT: v_perm_b32 v1, v0, v2, s2 2341; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2342; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2343; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 2344; GFX940-NEXT: s_waitcnt vmcnt(0) 2345; GFX940-NEXT: s_setpc_b64 s[30:31] 2346 %vec0 = call <4 x half> asm "; def $0", "=v"() 2347 %vec1 = call <4 x half> asm "; def $0", "=v"() 2348 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 1, i32 1> 2349 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2350 ret void 2351} 2352 2353define void @v_shuffle_v3f16_v4f16__6_1_1(ptr addrspace(1) inreg %ptr) { 2354; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_1_1: 2355; GFX900: ; %bb.0: 2356; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2357; GFX900-NEXT: ;;#ASMSTART 2358; GFX900-NEXT: ; def v[0:1] 2359; GFX900-NEXT: ;;#ASMEND 2360; GFX900-NEXT: ;;#ASMSTART 2361; GFX900-NEXT: ; def v[1:2] 2362; GFX900-NEXT: ;;#ASMEND 2363; GFX900-NEXT: s_mov_b32 s4, 0xffff 2364; GFX900-NEXT: v_mov_b32_e32 v3, 0 2365; GFX900-NEXT: v_bfi_b32 v1, s4, v2, v0 2366; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2367; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 2368; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2369; GFX900-NEXT: s_waitcnt vmcnt(0) 2370; GFX900-NEXT: s_setpc_b64 s[30:31] 2371; 2372; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_1_1: 2373; GFX90A: ; %bb.0: 2374; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2375; GFX90A-NEXT: ;;#ASMSTART 2376; GFX90A-NEXT: ; def v[0:1] 2377; GFX90A-NEXT: ;;#ASMEND 2378; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2379; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2380; GFX90A-NEXT: ;;#ASMSTART 2381; GFX90A-NEXT: ; def v[2:3] 2382; GFX90A-NEXT: ;;#ASMEND 2383; GFX90A-NEXT: v_bfi_b32 v1, s4, v3, v0 2384; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2385; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 2386; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2387; GFX90A-NEXT: s_waitcnt vmcnt(0) 2388; GFX90A-NEXT: s_setpc_b64 s[30:31] 2389; 2390; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_1_1: 2391; GFX940: ; %bb.0: 2392; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2393; GFX940-NEXT: ;;#ASMSTART 2394; GFX940-NEXT: ; def v[0:1] 2395; GFX940-NEXT: ;;#ASMEND 2396; GFX940-NEXT: s_mov_b32 s2, 0xffff 2397; GFX940-NEXT: v_mov_b32_e32 v4, 0 2398; GFX940-NEXT: ;;#ASMSTART 2399; GFX940-NEXT: ; def v[2:3] 2400; GFX940-NEXT: ;;#ASMEND 2401; GFX940-NEXT: s_nop 0 2402; GFX940-NEXT: v_bfi_b32 v1, s2, v3, v0 2403; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2404; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 2405; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2406; GFX940-NEXT: s_waitcnt vmcnt(0) 2407; GFX940-NEXT: s_setpc_b64 s[30:31] 2408 %vec0 = call <4 x half> asm "; def $0", "=v"() 2409 %vec1 = call <4 x half> asm "; def $0", "=v"() 2410 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 1, i32 1> 2411 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2412 ret void 2413} 2414 2415define void @v_shuffle_v3f16_v4f16__7_1_1(ptr addrspace(1) inreg %ptr) { 2416; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_1: 2417; GFX900: ; %bb.0: 2418; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2419; GFX900-NEXT: ;;#ASMSTART 2420; GFX900-NEXT: ; def v[0:1] 2421; GFX900-NEXT: ;;#ASMEND 2422; GFX900-NEXT: ;;#ASMSTART 2423; GFX900-NEXT: ; def v[1:2] 2424; GFX900-NEXT: ;;#ASMEND 2425; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2426; GFX900-NEXT: v_mov_b32_e32 v3, 0 2427; GFX900-NEXT: v_perm_b32 v1, v0, v2, s4 2428; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2429; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2430; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 2431; GFX900-NEXT: s_waitcnt vmcnt(0) 2432; GFX900-NEXT: s_setpc_b64 s[30:31] 2433; 2434; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_1: 2435; GFX90A: ; %bb.0: 2436; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2437; GFX90A-NEXT: ;;#ASMSTART 2438; GFX90A-NEXT: ; def v[0:1] 2439; GFX90A-NEXT: ;;#ASMEND 2440; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2441; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2442; GFX90A-NEXT: ;;#ASMSTART 2443; GFX90A-NEXT: ; def v[2:3] 2444; GFX90A-NEXT: ;;#ASMEND 2445; GFX90A-NEXT: v_perm_b32 v1, v0, v3, s4 2446; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2447; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2448; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 2449; GFX90A-NEXT: s_waitcnt vmcnt(0) 2450; GFX90A-NEXT: s_setpc_b64 s[30:31] 2451; 2452; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_1: 2453; GFX940: ; %bb.0: 2454; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2455; GFX940-NEXT: ;;#ASMSTART 2456; GFX940-NEXT: ; def v[0:1] 2457; GFX940-NEXT: ;;#ASMEND 2458; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2459; GFX940-NEXT: v_mov_b32_e32 v4, 0 2460; GFX940-NEXT: ;;#ASMSTART 2461; GFX940-NEXT: ; def v[2:3] 2462; GFX940-NEXT: ;;#ASMEND 2463; GFX940-NEXT: s_nop 0 2464; GFX940-NEXT: v_perm_b32 v1, v0, v3, s2 2465; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 2466; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2467; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 2468; GFX940-NEXT: s_waitcnt vmcnt(0) 2469; GFX940-NEXT: s_setpc_b64 s[30:31] 2470 %vec0 = call <4 x half> asm "; def $0", "=v"() 2471 %vec1 = call <4 x half> asm "; def $0", "=v"() 2472 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 1> 2473 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2474 ret void 2475} 2476 2477define void @v_shuffle_v3f16_v4f16__7_u_1(ptr addrspace(1) inreg %ptr) { 2478; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_1: 2479; GFX900: ; %bb.0: 2480; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2481; GFX900-NEXT: ;;#ASMSTART 2482; GFX900-NEXT: ; def v[0:1] 2483; GFX900-NEXT: ;;#ASMEND 2484; GFX900-NEXT: v_mov_b32_e32 v3, 0 2485; GFX900-NEXT: ;;#ASMSTART 2486; GFX900-NEXT: ; def v[1:2] 2487; GFX900-NEXT: ;;#ASMEND 2488; GFX900-NEXT: v_alignbit_b32 v1, s4, v2, 16 2489; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2490; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2491; GFX900-NEXT: s_waitcnt vmcnt(0) 2492; GFX900-NEXT: s_setpc_b64 s[30:31] 2493; 2494; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_1: 2495; GFX90A: ; %bb.0: 2496; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2497; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2498; GFX90A-NEXT: ;;#ASMSTART 2499; GFX90A-NEXT: ; def v[0:1] 2500; GFX90A-NEXT: ;;#ASMEND 2501; GFX90A-NEXT: ;;#ASMSTART 2502; GFX90A-NEXT: ; def v[2:3] 2503; GFX90A-NEXT: ;;#ASMEND 2504; GFX90A-NEXT: v_alignbit_b32 v1, s4, v3, 16 2505; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2506; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2507; GFX90A-NEXT: s_waitcnt vmcnt(0) 2508; GFX90A-NEXT: s_setpc_b64 s[30:31] 2509; 2510; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_1: 2511; GFX940: ; %bb.0: 2512; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2513; GFX940-NEXT: v_mov_b32_e32 v4, 0 2514; GFX940-NEXT: ;;#ASMSTART 2515; GFX940-NEXT: ; def v[0:1] 2516; GFX940-NEXT: ;;#ASMEND 2517; GFX940-NEXT: ;;#ASMSTART 2518; GFX940-NEXT: ; def v[2:3] 2519; GFX940-NEXT: ;;#ASMEND 2520; GFX940-NEXT: s_nop 0 2521; GFX940-NEXT: v_alignbit_b32 v1, s0, v3, 16 2522; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2523; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2524; GFX940-NEXT: s_waitcnt vmcnt(0) 2525; GFX940-NEXT: s_setpc_b64 s[30:31] 2526 %vec0 = call <4 x half> asm "; def $0", "=v"() 2527 %vec1 = call <4 x half> asm "; def $0", "=v"() 2528 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 1> 2529 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2530 ret void 2531} 2532 2533define void @v_shuffle_v3f16_v4f16__7_0_1(ptr addrspace(1) inreg %ptr) { 2534; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_1: 2535; GFX900: ; %bb.0: 2536; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2537; GFX900-NEXT: ;;#ASMSTART 2538; GFX900-NEXT: ; def v[0:1] 2539; GFX900-NEXT: ;;#ASMEND 2540; GFX900-NEXT: v_mov_b32_e32 v3, 0 2541; GFX900-NEXT: ;;#ASMSTART 2542; GFX900-NEXT: ; def v[1:2] 2543; GFX900-NEXT: ;;#ASMEND 2544; GFX900-NEXT: v_alignbit_b32 v1, v0, v2, 16 2545; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2546; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2547; GFX900-NEXT: s_waitcnt vmcnt(0) 2548; GFX900-NEXT: s_setpc_b64 s[30:31] 2549; 2550; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_1: 2551; GFX90A: ; %bb.0: 2552; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2553; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2554; GFX90A-NEXT: ;;#ASMSTART 2555; GFX90A-NEXT: ; def v[0:1] 2556; GFX90A-NEXT: ;;#ASMEND 2557; GFX90A-NEXT: ;;#ASMSTART 2558; GFX90A-NEXT: ; def v[2:3] 2559; GFX90A-NEXT: ;;#ASMEND 2560; GFX90A-NEXT: v_alignbit_b32 v1, v0, v3, 16 2561; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2562; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2563; GFX90A-NEXT: s_waitcnt vmcnt(0) 2564; GFX90A-NEXT: s_setpc_b64 s[30:31] 2565; 2566; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_1: 2567; GFX940: ; %bb.0: 2568; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2569; GFX940-NEXT: v_mov_b32_e32 v4, 0 2570; GFX940-NEXT: ;;#ASMSTART 2571; GFX940-NEXT: ; def v[0:1] 2572; GFX940-NEXT: ;;#ASMEND 2573; GFX940-NEXT: ;;#ASMSTART 2574; GFX940-NEXT: ; def v[2:3] 2575; GFX940-NEXT: ;;#ASMEND 2576; GFX940-NEXT: s_nop 0 2577; GFX940-NEXT: v_alignbit_b32 v1, v0, v3, 16 2578; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2579; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2580; GFX940-NEXT: s_waitcnt vmcnt(0) 2581; GFX940-NEXT: s_setpc_b64 s[30:31] 2582 %vec0 = call <4 x half> asm "; def $0", "=v"() 2583 %vec1 = call <4 x half> asm "; def $0", "=v"() 2584 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 1> 2585 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2586 ret void 2587} 2588 2589define void @v_shuffle_v3f16_v4f16__7_2_1(ptr addrspace(1) inreg %ptr) { 2590; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_1: 2591; GFX900: ; %bb.0: 2592; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2593; GFX900-NEXT: v_mov_b32_e32 v4, 0 2594; GFX900-NEXT: ;;#ASMSTART 2595; GFX900-NEXT: ; def v[0:1] 2596; GFX900-NEXT: ;;#ASMEND 2597; GFX900-NEXT: ;;#ASMSTART 2598; GFX900-NEXT: ; def v[2:3] 2599; GFX900-NEXT: ;;#ASMEND 2600; GFX900-NEXT: v_alignbit_b32 v1, v1, v3, 16 2601; GFX900-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2602; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 2603; GFX900-NEXT: s_waitcnt vmcnt(0) 2604; GFX900-NEXT: s_setpc_b64 s[30:31] 2605; 2606; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_1: 2607; GFX90A: ; %bb.0: 2608; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2609; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2610; GFX90A-NEXT: ;;#ASMSTART 2611; GFX90A-NEXT: ; def v[0:1] 2612; GFX90A-NEXT: ;;#ASMEND 2613; GFX90A-NEXT: ;;#ASMSTART 2614; GFX90A-NEXT: ; def v[2:3] 2615; GFX90A-NEXT: ;;#ASMEND 2616; GFX90A-NEXT: v_alignbit_b32 v1, v1, v3, 16 2617; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2618; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2619; GFX90A-NEXT: s_waitcnt vmcnt(0) 2620; GFX90A-NEXT: s_setpc_b64 s[30:31] 2621; 2622; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_1: 2623; GFX940: ; %bb.0: 2624; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2625; GFX940-NEXT: v_mov_b32_e32 v4, 0 2626; GFX940-NEXT: ;;#ASMSTART 2627; GFX940-NEXT: ; def v[0:1] 2628; GFX940-NEXT: ;;#ASMEND 2629; GFX940-NEXT: ;;#ASMSTART 2630; GFX940-NEXT: ; def v[2:3] 2631; GFX940-NEXT: ;;#ASMEND 2632; GFX940-NEXT: s_nop 0 2633; GFX940-NEXT: v_alignbit_b32 v1, v1, v3, 16 2634; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2635; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2636; GFX940-NEXT: s_waitcnt vmcnt(0) 2637; GFX940-NEXT: s_setpc_b64 s[30:31] 2638 %vec0 = call <4 x half> asm "; def $0", "=v"() 2639 %vec1 = call <4 x half> asm "; def $0", "=v"() 2640 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 1> 2641 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2642 ret void 2643} 2644 2645define void @v_shuffle_v3f16_v4f16__7_3_1(ptr addrspace(1) inreg %ptr) { 2646; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_1: 2647; GFX900: ; %bb.0: 2648; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2649; GFX900-NEXT: v_mov_b32_e32 v4, 0 2650; GFX900-NEXT: ;;#ASMSTART 2651; GFX900-NEXT: ; def v[0:1] 2652; GFX900-NEXT: ;;#ASMEND 2653; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2654; GFX900-NEXT: ;;#ASMSTART 2655; GFX900-NEXT: ; def v[2:3] 2656; GFX900-NEXT: ;;#ASMEND 2657; GFX900-NEXT: v_perm_b32 v1, v1, v3, s4 2658; GFX900-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2659; GFX900-NEXT: global_store_dword v4, v1, s[16:17] 2660; GFX900-NEXT: s_waitcnt vmcnt(0) 2661; GFX900-NEXT: s_setpc_b64 s[30:31] 2662; 2663; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_1: 2664; GFX90A: ; %bb.0: 2665; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2666; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2667; GFX90A-NEXT: ;;#ASMSTART 2668; GFX90A-NEXT: ; def v[0:1] 2669; GFX90A-NEXT: ;;#ASMEND 2670; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2671; GFX90A-NEXT: ;;#ASMSTART 2672; GFX90A-NEXT: ; def v[2:3] 2673; GFX90A-NEXT: ;;#ASMEND 2674; GFX90A-NEXT: v_perm_b32 v1, v1, v3, s4 2675; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2676; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2677; GFX90A-NEXT: s_waitcnt vmcnt(0) 2678; GFX90A-NEXT: s_setpc_b64 s[30:31] 2679; 2680; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_1: 2681; GFX940: ; %bb.0: 2682; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2683; GFX940-NEXT: v_mov_b32_e32 v4, 0 2684; GFX940-NEXT: ;;#ASMSTART 2685; GFX940-NEXT: ; def v[0:1] 2686; GFX940-NEXT: ;;#ASMEND 2687; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2688; GFX940-NEXT: ;;#ASMSTART 2689; GFX940-NEXT: ; def v[2:3] 2690; GFX940-NEXT: ;;#ASMEND 2691; GFX940-NEXT: s_nop 0 2692; GFX940-NEXT: v_perm_b32 v1, v1, v3, s2 2693; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2694; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2695; GFX940-NEXT: s_waitcnt vmcnt(0) 2696; GFX940-NEXT: s_setpc_b64 s[30:31] 2697 %vec0 = call <4 x half> asm "; def $0", "=v"() 2698 %vec1 = call <4 x half> asm "; def $0", "=v"() 2699 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 1> 2700 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2701 ret void 2702} 2703 2704define void @v_shuffle_v3f16_v4f16__7_4_1(ptr addrspace(1) inreg %ptr) { 2705; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_1: 2706; GFX900: ; %bb.0: 2707; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2708; GFX900-NEXT: ;;#ASMSTART 2709; GFX900-NEXT: ; def v[0:1] 2710; GFX900-NEXT: ;;#ASMEND 2711; GFX900-NEXT: v_mov_b32_e32 v3, 0 2712; GFX900-NEXT: ;;#ASMSTART 2713; GFX900-NEXT: ; def v[1:2] 2714; GFX900-NEXT: ;;#ASMEND 2715; GFX900-NEXT: v_alignbit_b32 v1, v1, v2, 16 2716; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2717; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2718; GFX900-NEXT: s_waitcnt vmcnt(0) 2719; GFX900-NEXT: s_setpc_b64 s[30:31] 2720; 2721; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_1: 2722; GFX90A: ; %bb.0: 2723; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2724; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2725; GFX90A-NEXT: ;;#ASMSTART 2726; GFX90A-NEXT: ; def v[0:1] 2727; GFX90A-NEXT: ;;#ASMEND 2728; GFX90A-NEXT: ;;#ASMSTART 2729; GFX90A-NEXT: ; def v[2:3] 2730; GFX90A-NEXT: ;;#ASMEND 2731; GFX90A-NEXT: v_alignbit_b32 v1, v2, v3, 16 2732; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2733; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2734; GFX90A-NEXT: s_waitcnt vmcnt(0) 2735; GFX90A-NEXT: s_setpc_b64 s[30:31] 2736; 2737; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_1: 2738; GFX940: ; %bb.0: 2739; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2740; GFX940-NEXT: v_mov_b32_e32 v4, 0 2741; GFX940-NEXT: ;;#ASMSTART 2742; GFX940-NEXT: ; def v[0:1] 2743; GFX940-NEXT: ;;#ASMEND 2744; GFX940-NEXT: ;;#ASMSTART 2745; GFX940-NEXT: ; def v[2:3] 2746; GFX940-NEXT: ;;#ASMEND 2747; GFX940-NEXT: s_nop 0 2748; GFX940-NEXT: v_alignbit_b32 v1, v2, v3, 16 2749; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2750; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2751; GFX940-NEXT: s_waitcnt vmcnt(0) 2752; GFX940-NEXT: s_setpc_b64 s[30:31] 2753 %vec0 = call <4 x half> asm "; def $0", "=v"() 2754 %vec1 = call <4 x half> asm "; def $0", "=v"() 2755 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 1> 2756 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2757 ret void 2758} 2759 2760define void @v_shuffle_v3f16_v4f16__7_5_1(ptr addrspace(1) inreg %ptr) { 2761; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_1: 2762; GFX900: ; %bb.0: 2763; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2764; GFX900-NEXT: ;;#ASMSTART 2765; GFX900-NEXT: ; def v[0:1] 2766; GFX900-NEXT: ;;#ASMEND 2767; GFX900-NEXT: v_mov_b32_e32 v3, 0 2768; GFX900-NEXT: ;;#ASMSTART 2769; GFX900-NEXT: ; def v[1:2] 2770; GFX900-NEXT: ;;#ASMEND 2771; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2772; GFX900-NEXT: v_perm_b32 v1, v1, v2, s4 2773; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2774; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2775; GFX900-NEXT: s_waitcnt vmcnt(0) 2776; GFX900-NEXT: s_setpc_b64 s[30:31] 2777; 2778; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_1: 2779; GFX90A: ; %bb.0: 2780; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2781; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2782; GFX90A-NEXT: ;;#ASMSTART 2783; GFX90A-NEXT: ; def v[0:1] 2784; GFX90A-NEXT: ;;#ASMEND 2785; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2786; GFX90A-NEXT: ;;#ASMSTART 2787; GFX90A-NEXT: ; def v[2:3] 2788; GFX90A-NEXT: ;;#ASMEND 2789; GFX90A-NEXT: v_perm_b32 v1, v2, v3, s4 2790; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2791; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2792; GFX90A-NEXT: s_waitcnt vmcnt(0) 2793; GFX90A-NEXT: s_setpc_b64 s[30:31] 2794; 2795; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_1: 2796; GFX940: ; %bb.0: 2797; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2798; GFX940-NEXT: v_mov_b32_e32 v4, 0 2799; GFX940-NEXT: ;;#ASMSTART 2800; GFX940-NEXT: ; def v[0:1] 2801; GFX940-NEXT: ;;#ASMEND 2802; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2803; GFX940-NEXT: ;;#ASMSTART 2804; GFX940-NEXT: ; def v[2:3] 2805; GFX940-NEXT: ;;#ASMEND 2806; GFX940-NEXT: s_nop 0 2807; GFX940-NEXT: v_perm_b32 v1, v2, v3, s2 2808; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2809; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2810; GFX940-NEXT: s_waitcnt vmcnt(0) 2811; GFX940-NEXT: s_setpc_b64 s[30:31] 2812 %vec0 = call <4 x half> asm "; def $0", "=v"() 2813 %vec1 = call <4 x half> asm "; def $0", "=v"() 2814 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 1> 2815 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2816 ret void 2817} 2818 2819define void @v_shuffle_v3f16_v4f16__7_6_1(ptr addrspace(1) inreg %ptr) { 2820; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_1: 2821; GFX900: ; %bb.0: 2822; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2823; GFX900-NEXT: ;;#ASMSTART 2824; GFX900-NEXT: ; def v[0:1] 2825; GFX900-NEXT: ;;#ASMEND 2826; GFX900-NEXT: v_mov_b32_e32 v3, 0 2827; GFX900-NEXT: ;;#ASMSTART 2828; GFX900-NEXT: ; def v[1:2] 2829; GFX900-NEXT: ;;#ASMEND 2830; GFX900-NEXT: v_alignbit_b32 v1, v2, v2, 16 2831; GFX900-NEXT: global_store_short_d16_hi v3, v0, s[16:17] offset:4 2832; GFX900-NEXT: global_store_dword v3, v1, s[16:17] 2833; GFX900-NEXT: s_waitcnt vmcnt(0) 2834; GFX900-NEXT: s_setpc_b64 s[30:31] 2835; 2836; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_1: 2837; GFX90A: ; %bb.0: 2838; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2839; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2840; GFX90A-NEXT: ;;#ASMSTART 2841; GFX90A-NEXT: ; def v[0:1] 2842; GFX90A-NEXT: ;;#ASMEND 2843; GFX90A-NEXT: ;;#ASMSTART 2844; GFX90A-NEXT: ; def v[2:3] 2845; GFX90A-NEXT: ;;#ASMEND 2846; GFX90A-NEXT: v_alignbit_b32 v1, v3, v3, 16 2847; GFX90A-NEXT: global_store_short_d16_hi v4, v0, s[16:17] offset:4 2848; GFX90A-NEXT: global_store_dword v4, v1, s[16:17] 2849; GFX90A-NEXT: s_waitcnt vmcnt(0) 2850; GFX90A-NEXT: s_setpc_b64 s[30:31] 2851; 2852; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_1: 2853; GFX940: ; %bb.0: 2854; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2855; GFX940-NEXT: v_mov_b32_e32 v4, 0 2856; GFX940-NEXT: ;;#ASMSTART 2857; GFX940-NEXT: ; def v[0:1] 2858; GFX940-NEXT: ;;#ASMEND 2859; GFX940-NEXT: ;;#ASMSTART 2860; GFX940-NEXT: ; def v[2:3] 2861; GFX940-NEXT: ;;#ASMEND 2862; GFX940-NEXT: s_nop 0 2863; GFX940-NEXT: v_alignbit_b32 v1, v3, v3, 16 2864; GFX940-NEXT: global_store_short_d16_hi v4, v0, s[0:1] offset:4 sc0 sc1 2865; GFX940-NEXT: global_store_dword v4, v1, s[0:1] sc0 sc1 2866; GFX940-NEXT: s_waitcnt vmcnt(0) 2867; GFX940-NEXT: s_setpc_b64 s[30:31] 2868 %vec0 = call <4 x half> asm "; def $0", "=v"() 2869 %vec1 = call <4 x half> asm "; def $0", "=v"() 2870 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 1> 2871 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2872 ret void 2873} 2874 2875define void @v_shuffle_v3f16_v4f16__u_2_2(ptr addrspace(1) inreg %ptr) { 2876; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_2_2: 2877; GFX900: ; %bb.0: 2878; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2879; GFX900-NEXT: v_mov_b32_e32 v2, 0 2880; GFX900-NEXT: ;;#ASMSTART 2881; GFX900-NEXT: ; def v[0:1] 2882; GFX900-NEXT: ;;#ASMEND 2883; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2884; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2885; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2886; GFX900-NEXT: s_waitcnt vmcnt(0) 2887; GFX900-NEXT: s_setpc_b64 s[30:31] 2888; 2889; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_2_2: 2890; GFX90A: ; %bb.0: 2891; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2892; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2893; GFX90A-NEXT: ;;#ASMSTART 2894; GFX90A-NEXT: ; def v[0:1] 2895; GFX90A-NEXT: ;;#ASMEND 2896; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2897; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2898; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2899; GFX90A-NEXT: s_waitcnt vmcnt(0) 2900; GFX90A-NEXT: s_setpc_b64 s[30:31] 2901; 2902; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_2_2: 2903; GFX940: ; %bb.0: 2904; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2905; GFX940-NEXT: v_mov_b32_e32 v2, 0 2906; GFX940-NEXT: ;;#ASMSTART 2907; GFX940-NEXT: ; def v[0:1] 2908; GFX940-NEXT: ;;#ASMEND 2909; GFX940-NEXT: s_nop 0 2910; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2911; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2912; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2913; GFX940-NEXT: s_waitcnt vmcnt(0) 2914; GFX940-NEXT: s_setpc_b64 s[30:31] 2915 %vec0 = call <4 x half> asm "; def $0", "=v"() 2916 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 2, i32 2> 2917 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2918 ret void 2919} 2920 2921define void @v_shuffle_v3f16_v4f16__0_2_2(ptr addrspace(1) inreg %ptr) { 2922; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_2_2: 2923; GFX900: ; %bb.0: 2924; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2925; GFX900-NEXT: v_mov_b32_e32 v2, 0 2926; GFX900-NEXT: ;;#ASMSTART 2927; GFX900-NEXT: ; def v[0:1] 2928; GFX900-NEXT: ;;#ASMEND 2929; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2930; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 2931; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2932; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2933; GFX900-NEXT: s_waitcnt vmcnt(0) 2934; GFX900-NEXT: s_setpc_b64 s[30:31] 2935; 2936; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_2_2: 2937; GFX90A: ; %bb.0: 2938; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2939; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2940; GFX90A-NEXT: ;;#ASMSTART 2941; GFX90A-NEXT: ; def v[0:1] 2942; GFX90A-NEXT: ;;#ASMEND 2943; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2944; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 2945; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2946; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2947; GFX90A-NEXT: s_waitcnt vmcnt(0) 2948; GFX90A-NEXT: s_setpc_b64 s[30:31] 2949; 2950; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_2_2: 2951; GFX940: ; %bb.0: 2952; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2953; GFX940-NEXT: v_mov_b32_e32 v2, 0 2954; GFX940-NEXT: ;;#ASMSTART 2955; GFX940-NEXT: ; def v[0:1] 2956; GFX940-NEXT: ;;#ASMEND 2957; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2958; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 2959; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 2960; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2961; GFX940-NEXT: s_waitcnt vmcnt(0) 2962; GFX940-NEXT: s_setpc_b64 s[30:31] 2963 %vec0 = call <4 x half> asm "; def $0", "=v"() 2964 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 2, i32 2> 2965 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 2966 ret void 2967} 2968 2969define void @v_shuffle_v3f16_v4f16__1_2_2(ptr addrspace(1) inreg %ptr) { 2970; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_2_2: 2971; GFX900: ; %bb.0: 2972; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2973; GFX900-NEXT: v_mov_b32_e32 v2, 0 2974; GFX900-NEXT: ;;#ASMSTART 2975; GFX900-NEXT: ; def v[0:1] 2976; GFX900-NEXT: ;;#ASMEND 2977; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 2978; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 2979; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2980; GFX900-NEXT: s_waitcnt vmcnt(0) 2981; GFX900-NEXT: s_setpc_b64 s[30:31] 2982; 2983; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_2_2: 2984; GFX90A: ; %bb.0: 2985; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2986; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2987; GFX90A-NEXT: ;;#ASMSTART 2988; GFX90A-NEXT: ; def v[0:1] 2989; GFX90A-NEXT: ;;#ASMEND 2990; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 2991; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 2992; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2993; GFX90A-NEXT: s_waitcnt vmcnt(0) 2994; GFX90A-NEXT: s_setpc_b64 s[30:31] 2995; 2996; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_2_2: 2997; GFX940: ; %bb.0: 2998; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2999; GFX940-NEXT: v_mov_b32_e32 v2, 0 3000; GFX940-NEXT: ;;#ASMSTART 3001; GFX940-NEXT: ; def v[0:1] 3002; GFX940-NEXT: ;;#ASMEND 3003; GFX940-NEXT: s_nop 0 3004; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 3005; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3006; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3007; GFX940-NEXT: s_waitcnt vmcnt(0) 3008; GFX940-NEXT: s_setpc_b64 s[30:31] 3009 %vec0 = call <4 x half> asm "; def $0", "=v"() 3010 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 2, i32 2> 3011 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3012 ret void 3013} 3014 3015define void @v_shuffle_v3f16_v4f16__2_2_2(ptr addrspace(1) inreg %ptr) { 3016; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_2_2: 3017; GFX900: ; %bb.0: 3018; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3019; GFX900-NEXT: v_mov_b32_e32 v2, 0 3020; GFX900-NEXT: ;;#ASMSTART 3021; GFX900-NEXT: ; def v[0:1] 3022; GFX900-NEXT: ;;#ASMEND 3023; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3024; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 3025; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3026; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3027; GFX900-NEXT: s_waitcnt vmcnt(0) 3028; GFX900-NEXT: s_setpc_b64 s[30:31] 3029; 3030; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_2_2: 3031; GFX90A: ; %bb.0: 3032; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3033; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3034; GFX90A-NEXT: ;;#ASMSTART 3035; GFX90A-NEXT: ; def v[0:1] 3036; GFX90A-NEXT: ;;#ASMEND 3037; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3038; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 3039; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3040; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3041; GFX90A-NEXT: s_waitcnt vmcnt(0) 3042; GFX90A-NEXT: s_setpc_b64 s[30:31] 3043; 3044; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_2_2: 3045; GFX940: ; %bb.0: 3046; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3047; GFX940-NEXT: v_mov_b32_e32 v2, 0 3048; GFX940-NEXT: ;;#ASMSTART 3049; GFX940-NEXT: ; def v[0:1] 3050; GFX940-NEXT: ;;#ASMEND 3051; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3052; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 3053; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3054; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3055; GFX940-NEXT: s_waitcnt vmcnt(0) 3056; GFX940-NEXT: s_setpc_b64 s[30:31] 3057 %vec0 = call <4 x half> asm "; def $0", "=v"() 3058 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 2, i32 2> 3059 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3060 ret void 3061} 3062 3063define void @v_shuffle_v3f16_v4f16__3_2_2(ptr addrspace(1) inreg %ptr) { 3064; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_2_2: 3065; GFX900: ; %bb.0: 3066; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3067; GFX900-NEXT: v_mov_b32_e32 v2, 0 3068; GFX900-NEXT: ;;#ASMSTART 3069; GFX900-NEXT: ; def v[0:1] 3070; GFX900-NEXT: ;;#ASMEND 3071; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 3072; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3073; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3074; GFX900-NEXT: s_waitcnt vmcnt(0) 3075; GFX900-NEXT: s_setpc_b64 s[30:31] 3076; 3077; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_2_2: 3078; GFX90A: ; %bb.0: 3079; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3080; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3081; GFX90A-NEXT: ;;#ASMSTART 3082; GFX90A-NEXT: ; def v[0:1] 3083; GFX90A-NEXT: ;;#ASMEND 3084; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 3085; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3086; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3087; GFX90A-NEXT: s_waitcnt vmcnt(0) 3088; GFX90A-NEXT: s_setpc_b64 s[30:31] 3089; 3090; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_2_2: 3091; GFX940: ; %bb.0: 3092; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3093; GFX940-NEXT: v_mov_b32_e32 v2, 0 3094; GFX940-NEXT: ;;#ASMSTART 3095; GFX940-NEXT: ; def v[0:1] 3096; GFX940-NEXT: ;;#ASMEND 3097; GFX940-NEXT: s_nop 0 3098; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 3099; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3100; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3101; GFX940-NEXT: s_waitcnt vmcnt(0) 3102; GFX940-NEXT: s_setpc_b64 s[30:31] 3103 %vec0 = call <4 x half> asm "; def $0", "=v"() 3104 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 2, i32 2> 3105 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3106 ret void 3107} 3108 3109define void @v_shuffle_v3f16_v4f16__4_2_2(ptr addrspace(1) inreg %ptr) { 3110; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_2_2: 3111; GFX900: ; %bb.0: 3112; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3113; GFX900-NEXT: v_mov_b32_e32 v2, 0 3114; GFX900-NEXT: ;;#ASMSTART 3115; GFX900-NEXT: ; def v[0:1] 3116; GFX900-NEXT: ;;#ASMEND 3117; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 3118; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3119; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3120; GFX900-NEXT: s_waitcnt vmcnt(0) 3121; GFX900-NEXT: s_setpc_b64 s[30:31] 3122; 3123; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_2_2: 3124; GFX90A: ; %bb.0: 3125; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3126; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3127; GFX90A-NEXT: ;;#ASMSTART 3128; GFX90A-NEXT: ; def v[0:1] 3129; GFX90A-NEXT: ;;#ASMEND 3130; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 3131; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3132; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3133; GFX90A-NEXT: s_waitcnt vmcnt(0) 3134; GFX90A-NEXT: s_setpc_b64 s[30:31] 3135; 3136; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_2_2: 3137; GFX940: ; %bb.0: 3138; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3139; GFX940-NEXT: v_mov_b32_e32 v2, 0 3140; GFX940-NEXT: ;;#ASMSTART 3141; GFX940-NEXT: ; def v[0:1] 3142; GFX940-NEXT: ;;#ASMEND 3143; GFX940-NEXT: s_nop 0 3144; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 3145; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3146; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3147; GFX940-NEXT: s_waitcnt vmcnt(0) 3148; GFX940-NEXT: s_setpc_b64 s[30:31] 3149 %vec0 = call <4 x half> asm "; def $0", "=v"() 3150 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 2, i32 2> 3151 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3152 ret void 3153} 3154 3155define void @v_shuffle_v3f16_v4f16__5_2_2(ptr addrspace(1) inreg %ptr) { 3156; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_2_2: 3157; GFX900: ; %bb.0: 3158; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3159; GFX900-NEXT: v_mov_b32_e32 v4, 0 3160; GFX900-NEXT: ;;#ASMSTART 3161; GFX900-NEXT: ; def v[0:1] 3162; GFX900-NEXT: ;;#ASMEND 3163; GFX900-NEXT: ;;#ASMSTART 3164; GFX900-NEXT: ; def v[2:3] 3165; GFX900-NEXT: ;;#ASMEND 3166; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16 3167; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3168; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3169; GFX900-NEXT: s_waitcnt vmcnt(0) 3170; GFX900-NEXT: s_setpc_b64 s[30:31] 3171; 3172; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_2_2: 3173; GFX90A: ; %bb.0: 3174; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3175; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3176; GFX90A-NEXT: ;;#ASMSTART 3177; GFX90A-NEXT: ; def v[0:1] 3178; GFX90A-NEXT: ;;#ASMEND 3179; GFX90A-NEXT: ;;#ASMSTART 3180; GFX90A-NEXT: ; def v[2:3] 3181; GFX90A-NEXT: ;;#ASMEND 3182; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16 3183; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3184; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3185; GFX90A-NEXT: s_waitcnt vmcnt(0) 3186; GFX90A-NEXT: s_setpc_b64 s[30:31] 3187; 3188; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_2_2: 3189; GFX940: ; %bb.0: 3190; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3191; GFX940-NEXT: v_mov_b32_e32 v4, 0 3192; GFX940-NEXT: ;;#ASMSTART 3193; GFX940-NEXT: ; def v[0:1] 3194; GFX940-NEXT: ;;#ASMEND 3195; GFX940-NEXT: ;;#ASMSTART 3196; GFX940-NEXT: ; def v[2:3] 3197; GFX940-NEXT: ;;#ASMEND 3198; GFX940-NEXT: s_nop 0 3199; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 3200; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3201; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3202; GFX940-NEXT: s_waitcnt vmcnt(0) 3203; GFX940-NEXT: s_setpc_b64 s[30:31] 3204 %vec0 = call <4 x half> asm "; def $0", "=v"() 3205 %vec1 = call <4 x half> asm "; def $0", "=v"() 3206 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 2, i32 2> 3207 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3208 ret void 3209} 3210 3211define void @v_shuffle_v3f16_v4f16__6_2_2(ptr addrspace(1) inreg %ptr) { 3212; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_2_2: 3213; GFX900: ; %bb.0: 3214; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3215; GFX900-NEXT: v_mov_b32_e32 v4, 0 3216; GFX900-NEXT: ;;#ASMSTART 3217; GFX900-NEXT: ; def v[0:1] 3218; GFX900-NEXT: ;;#ASMEND 3219; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3220; GFX900-NEXT: ;;#ASMSTART 3221; GFX900-NEXT: ; def v[2:3] 3222; GFX900-NEXT: ;;#ASMEND 3223; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 3224; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3225; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3226; GFX900-NEXT: s_waitcnt vmcnt(0) 3227; GFX900-NEXT: s_setpc_b64 s[30:31] 3228; 3229; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_2_2: 3230; GFX90A: ; %bb.0: 3231; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3232; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3233; GFX90A-NEXT: ;;#ASMSTART 3234; GFX90A-NEXT: ; def v[0:1] 3235; GFX90A-NEXT: ;;#ASMEND 3236; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3237; GFX90A-NEXT: ;;#ASMSTART 3238; GFX90A-NEXT: ; def v[2:3] 3239; GFX90A-NEXT: ;;#ASMEND 3240; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 3241; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3242; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3243; GFX90A-NEXT: s_waitcnt vmcnt(0) 3244; GFX90A-NEXT: s_setpc_b64 s[30:31] 3245; 3246; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_2_2: 3247; GFX940: ; %bb.0: 3248; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3249; GFX940-NEXT: v_mov_b32_e32 v4, 0 3250; GFX940-NEXT: ;;#ASMSTART 3251; GFX940-NEXT: ; def v[0:1] 3252; GFX940-NEXT: ;;#ASMEND 3253; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3254; GFX940-NEXT: ;;#ASMSTART 3255; GFX940-NEXT: ; def v[2:3] 3256; GFX940-NEXT: ;;#ASMEND 3257; GFX940-NEXT: s_nop 0 3258; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 3259; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3260; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3261; GFX940-NEXT: s_waitcnt vmcnt(0) 3262; GFX940-NEXT: s_setpc_b64 s[30:31] 3263 %vec0 = call <4 x half> asm "; def $0", "=v"() 3264 %vec1 = call <4 x half> asm "; def $0", "=v"() 3265 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 2, i32 2> 3266 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3267 ret void 3268} 3269 3270define void @v_shuffle_v3f16_v4f16__7_2_2(ptr addrspace(1) inreg %ptr) { 3271; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_2: 3272; GFX900: ; %bb.0: 3273; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3274; GFX900-NEXT: v_mov_b32_e32 v4, 0 3275; GFX900-NEXT: ;;#ASMSTART 3276; GFX900-NEXT: ; def v[0:1] 3277; GFX900-NEXT: ;;#ASMEND 3278; GFX900-NEXT: ;;#ASMSTART 3279; GFX900-NEXT: ; def v[2:3] 3280; GFX900-NEXT: ;;#ASMEND 3281; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 3282; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3283; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3284; GFX900-NEXT: s_waitcnt vmcnt(0) 3285; GFX900-NEXT: s_setpc_b64 s[30:31] 3286; 3287; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_2: 3288; GFX90A: ; %bb.0: 3289; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3290; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3291; GFX90A-NEXT: ;;#ASMSTART 3292; GFX90A-NEXT: ; def v[0:1] 3293; GFX90A-NEXT: ;;#ASMEND 3294; GFX90A-NEXT: ;;#ASMSTART 3295; GFX90A-NEXT: ; def v[2:3] 3296; GFX90A-NEXT: ;;#ASMEND 3297; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 3298; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3299; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3300; GFX90A-NEXT: s_waitcnt vmcnt(0) 3301; GFX90A-NEXT: s_setpc_b64 s[30:31] 3302; 3303; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_2: 3304; GFX940: ; %bb.0: 3305; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3306; GFX940-NEXT: v_mov_b32_e32 v4, 0 3307; GFX940-NEXT: ;;#ASMSTART 3308; GFX940-NEXT: ; def v[0:1] 3309; GFX940-NEXT: ;;#ASMEND 3310; GFX940-NEXT: ;;#ASMSTART 3311; GFX940-NEXT: ; def v[2:3] 3312; GFX940-NEXT: ;;#ASMEND 3313; GFX940-NEXT: s_nop 0 3314; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 3315; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3316; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3317; GFX940-NEXT: s_waitcnt vmcnt(0) 3318; GFX940-NEXT: s_setpc_b64 s[30:31] 3319 %vec0 = call <4 x half> asm "; def $0", "=v"() 3320 %vec1 = call <4 x half> asm "; def $0", "=v"() 3321 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 2> 3322 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3323 ret void 3324} 3325 3326define void @v_shuffle_v3f16_v4f16__7_u_2(ptr addrspace(1) inreg %ptr) { 3327; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_2: 3328; GFX900: ; %bb.0: 3329; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3330; GFX900-NEXT: v_mov_b32_e32 v4, 0 3331; GFX900-NEXT: ;;#ASMSTART 3332; GFX900-NEXT: ; def v[0:1] 3333; GFX900-NEXT: ;;#ASMEND 3334; GFX900-NEXT: ;;#ASMSTART 3335; GFX900-NEXT: ; def v[2:3] 3336; GFX900-NEXT: ;;#ASMEND 3337; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16 3338; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3339; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3340; GFX900-NEXT: s_waitcnt vmcnt(0) 3341; GFX900-NEXT: s_setpc_b64 s[30:31] 3342; 3343; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_2: 3344; GFX90A: ; %bb.0: 3345; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3346; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3347; GFX90A-NEXT: ;;#ASMSTART 3348; GFX90A-NEXT: ; def v[0:1] 3349; GFX90A-NEXT: ;;#ASMEND 3350; GFX90A-NEXT: ;;#ASMSTART 3351; GFX90A-NEXT: ; def v[2:3] 3352; GFX90A-NEXT: ;;#ASMEND 3353; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 3354; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3355; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3356; GFX90A-NEXT: s_waitcnt vmcnt(0) 3357; GFX90A-NEXT: s_setpc_b64 s[30:31] 3358; 3359; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_2: 3360; GFX940: ; %bb.0: 3361; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3362; GFX940-NEXT: v_mov_b32_e32 v4, 0 3363; GFX940-NEXT: ;;#ASMSTART 3364; GFX940-NEXT: ; def v[0:1] 3365; GFX940-NEXT: ;;#ASMEND 3366; GFX940-NEXT: ;;#ASMSTART 3367; GFX940-NEXT: ; def v[2:3] 3368; GFX940-NEXT: ;;#ASMEND 3369; GFX940-NEXT: s_nop 0 3370; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 3371; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3372; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3373; GFX940-NEXT: s_waitcnt vmcnt(0) 3374; GFX940-NEXT: s_setpc_b64 s[30:31] 3375 %vec0 = call <4 x half> asm "; def $0", "=v"() 3376 %vec1 = call <4 x half> asm "; def $0", "=v"() 3377 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 2> 3378 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3379 ret void 3380} 3381 3382define void @v_shuffle_v3f16_v4f16__7_0_2(ptr addrspace(1) inreg %ptr) { 3383; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_2: 3384; GFX900: ; %bb.0: 3385; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3386; GFX900-NEXT: v_mov_b32_e32 v4, 0 3387; GFX900-NEXT: ;;#ASMSTART 3388; GFX900-NEXT: ; def v[0:1] 3389; GFX900-NEXT: ;;#ASMEND 3390; GFX900-NEXT: ;;#ASMSTART 3391; GFX900-NEXT: ; def v[2:3] 3392; GFX900-NEXT: ;;#ASMEND 3393; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16 3394; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3395; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3396; GFX900-NEXT: s_waitcnt vmcnt(0) 3397; GFX900-NEXT: s_setpc_b64 s[30:31] 3398; 3399; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_2: 3400; GFX90A: ; %bb.0: 3401; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3402; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3403; GFX90A-NEXT: ;;#ASMSTART 3404; GFX90A-NEXT: ; def v[0:1] 3405; GFX90A-NEXT: ;;#ASMEND 3406; GFX90A-NEXT: ;;#ASMSTART 3407; GFX90A-NEXT: ; def v[2:3] 3408; GFX90A-NEXT: ;;#ASMEND 3409; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 3410; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3411; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3412; GFX90A-NEXT: s_waitcnt vmcnt(0) 3413; GFX90A-NEXT: s_setpc_b64 s[30:31] 3414; 3415; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_2: 3416; GFX940: ; %bb.0: 3417; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3418; GFX940-NEXT: v_mov_b32_e32 v4, 0 3419; GFX940-NEXT: ;;#ASMSTART 3420; GFX940-NEXT: ; def v[0:1] 3421; GFX940-NEXT: ;;#ASMEND 3422; GFX940-NEXT: ;;#ASMSTART 3423; GFX940-NEXT: ; def v[2:3] 3424; GFX940-NEXT: ;;#ASMEND 3425; GFX940-NEXT: s_nop 0 3426; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 3427; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3428; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3429; GFX940-NEXT: s_waitcnt vmcnt(0) 3430; GFX940-NEXT: s_setpc_b64 s[30:31] 3431 %vec0 = call <4 x half> asm "; def $0", "=v"() 3432 %vec1 = call <4 x half> asm "; def $0", "=v"() 3433 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 2> 3434 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3435 ret void 3436} 3437 3438define void @v_shuffle_v3f16_v4f16__7_1_2(ptr addrspace(1) inreg %ptr) { 3439; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_2: 3440; GFX900: ; %bb.0: 3441; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3442; GFX900-NEXT: v_mov_b32_e32 v4, 0 3443; GFX900-NEXT: ;;#ASMSTART 3444; GFX900-NEXT: ; def v[0:1] 3445; GFX900-NEXT: ;;#ASMEND 3446; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3447; GFX900-NEXT: ;;#ASMSTART 3448; GFX900-NEXT: ; def v[2:3] 3449; GFX900-NEXT: ;;#ASMEND 3450; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 3451; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3452; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3453; GFX900-NEXT: s_waitcnt vmcnt(0) 3454; GFX900-NEXT: s_setpc_b64 s[30:31] 3455; 3456; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_2: 3457; GFX90A: ; %bb.0: 3458; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3459; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3460; GFX90A-NEXT: ;;#ASMSTART 3461; GFX90A-NEXT: ; def v[0:1] 3462; GFX90A-NEXT: ;;#ASMEND 3463; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3464; GFX90A-NEXT: ;;#ASMSTART 3465; GFX90A-NEXT: ; def v[2:3] 3466; GFX90A-NEXT: ;;#ASMEND 3467; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 3468; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3469; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3470; GFX90A-NEXT: s_waitcnt vmcnt(0) 3471; GFX90A-NEXT: s_setpc_b64 s[30:31] 3472; 3473; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_2: 3474; GFX940: ; %bb.0: 3475; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3476; GFX940-NEXT: v_mov_b32_e32 v4, 0 3477; GFX940-NEXT: ;;#ASMSTART 3478; GFX940-NEXT: ; def v[0:1] 3479; GFX940-NEXT: ;;#ASMEND 3480; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3481; GFX940-NEXT: ;;#ASMSTART 3482; GFX940-NEXT: ; def v[2:3] 3483; GFX940-NEXT: ;;#ASMEND 3484; GFX940-NEXT: s_nop 0 3485; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 3486; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3487; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3488; GFX940-NEXT: s_waitcnt vmcnt(0) 3489; GFX940-NEXT: s_setpc_b64 s[30:31] 3490 %vec0 = call <4 x half> asm "; def $0", "=v"() 3491 %vec1 = call <4 x half> asm "; def $0", "=v"() 3492 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 2> 3493 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3494 ret void 3495} 3496 3497define void @v_shuffle_v3f16_v4f16__7_3_2(ptr addrspace(1) inreg %ptr) { 3498; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_2: 3499; GFX900: ; %bb.0: 3500; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3501; GFX900-NEXT: v_mov_b32_e32 v4, 0 3502; GFX900-NEXT: ;;#ASMSTART 3503; GFX900-NEXT: ; def v[0:1] 3504; GFX900-NEXT: ;;#ASMEND 3505; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3506; GFX900-NEXT: ;;#ASMSTART 3507; GFX900-NEXT: ; def v[2:3] 3508; GFX900-NEXT: ;;#ASMEND 3509; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 3510; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3511; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3512; GFX900-NEXT: s_waitcnt vmcnt(0) 3513; GFX900-NEXT: s_setpc_b64 s[30:31] 3514; 3515; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_2: 3516; GFX90A: ; %bb.0: 3517; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3518; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3519; GFX90A-NEXT: ;;#ASMSTART 3520; GFX90A-NEXT: ; def v[0:1] 3521; GFX90A-NEXT: ;;#ASMEND 3522; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3523; GFX90A-NEXT: ;;#ASMSTART 3524; GFX90A-NEXT: ; def v[2:3] 3525; GFX90A-NEXT: ;;#ASMEND 3526; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 3527; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3528; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3529; GFX90A-NEXT: s_waitcnt vmcnt(0) 3530; GFX90A-NEXT: s_setpc_b64 s[30:31] 3531; 3532; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_2: 3533; GFX940: ; %bb.0: 3534; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3535; GFX940-NEXT: v_mov_b32_e32 v4, 0 3536; GFX940-NEXT: ;;#ASMSTART 3537; GFX940-NEXT: ; def v[0:1] 3538; GFX940-NEXT: ;;#ASMEND 3539; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3540; GFX940-NEXT: ;;#ASMSTART 3541; GFX940-NEXT: ; def v[2:3] 3542; GFX940-NEXT: ;;#ASMEND 3543; GFX940-NEXT: s_nop 0 3544; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 3545; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3546; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3547; GFX940-NEXT: s_waitcnt vmcnt(0) 3548; GFX940-NEXT: s_setpc_b64 s[30:31] 3549 %vec0 = call <4 x half> asm "; def $0", "=v"() 3550 %vec1 = call <4 x half> asm "; def $0", "=v"() 3551 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 2> 3552 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3553 ret void 3554} 3555 3556define void @v_shuffle_v3f16_v4f16__7_4_2(ptr addrspace(1) inreg %ptr) { 3557; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_2: 3558; GFX900: ; %bb.0: 3559; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3560; GFX900-NEXT: v_mov_b32_e32 v4, 0 3561; GFX900-NEXT: ;;#ASMSTART 3562; GFX900-NEXT: ; def v[0:1] 3563; GFX900-NEXT: ;;#ASMEND 3564; GFX900-NEXT: ;;#ASMSTART 3565; GFX900-NEXT: ; def v[2:3] 3566; GFX900-NEXT: ;;#ASMEND 3567; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16 3568; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3569; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3570; GFX900-NEXT: s_waitcnt vmcnt(0) 3571; GFX900-NEXT: s_setpc_b64 s[30:31] 3572; 3573; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_2: 3574; GFX90A: ; %bb.0: 3575; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3576; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3577; GFX90A-NEXT: ;;#ASMSTART 3578; GFX90A-NEXT: ; def v[0:1] 3579; GFX90A-NEXT: ;;#ASMEND 3580; GFX90A-NEXT: ;;#ASMSTART 3581; GFX90A-NEXT: ; def v[2:3] 3582; GFX90A-NEXT: ;;#ASMEND 3583; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 3584; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3585; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3586; GFX90A-NEXT: s_waitcnt vmcnt(0) 3587; GFX90A-NEXT: s_setpc_b64 s[30:31] 3588; 3589; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_2: 3590; GFX940: ; %bb.0: 3591; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3592; GFX940-NEXT: v_mov_b32_e32 v4, 0 3593; GFX940-NEXT: ;;#ASMSTART 3594; GFX940-NEXT: ; def v[0:1] 3595; GFX940-NEXT: ;;#ASMEND 3596; GFX940-NEXT: ;;#ASMSTART 3597; GFX940-NEXT: ; def v[2:3] 3598; GFX940-NEXT: ;;#ASMEND 3599; GFX940-NEXT: s_nop 0 3600; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 3601; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3602; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3603; GFX940-NEXT: s_waitcnt vmcnt(0) 3604; GFX940-NEXT: s_setpc_b64 s[30:31] 3605 %vec0 = call <4 x half> asm "; def $0", "=v"() 3606 %vec1 = call <4 x half> asm "; def $0", "=v"() 3607 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 2> 3608 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3609 ret void 3610} 3611 3612define void @v_shuffle_v3f16_v4f16__7_5_2(ptr addrspace(1) inreg %ptr) { 3613; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_2: 3614; GFX900: ; %bb.0: 3615; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3616; GFX900-NEXT: v_mov_b32_e32 v4, 0 3617; GFX900-NEXT: ;;#ASMSTART 3618; GFX900-NEXT: ; def v[0:1] 3619; GFX900-NEXT: ;;#ASMEND 3620; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3621; GFX900-NEXT: ;;#ASMSTART 3622; GFX900-NEXT: ; def v[2:3] 3623; GFX900-NEXT: ;;#ASMEND 3624; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 3625; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3626; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3627; GFX900-NEXT: s_waitcnt vmcnt(0) 3628; GFX900-NEXT: s_setpc_b64 s[30:31] 3629; 3630; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_2: 3631; GFX90A: ; %bb.0: 3632; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3633; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3634; GFX90A-NEXT: ;;#ASMSTART 3635; GFX90A-NEXT: ; def v[0:1] 3636; GFX90A-NEXT: ;;#ASMEND 3637; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3638; GFX90A-NEXT: ;;#ASMSTART 3639; GFX90A-NEXT: ; def v[2:3] 3640; GFX90A-NEXT: ;;#ASMEND 3641; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 3642; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3643; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3644; GFX90A-NEXT: s_waitcnt vmcnt(0) 3645; GFX90A-NEXT: s_setpc_b64 s[30:31] 3646; 3647; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_2: 3648; GFX940: ; %bb.0: 3649; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3650; GFX940-NEXT: v_mov_b32_e32 v4, 0 3651; GFX940-NEXT: ;;#ASMSTART 3652; GFX940-NEXT: ; def v[0:1] 3653; GFX940-NEXT: ;;#ASMEND 3654; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3655; GFX940-NEXT: ;;#ASMSTART 3656; GFX940-NEXT: ; def v[2:3] 3657; GFX940-NEXT: ;;#ASMEND 3658; GFX940-NEXT: s_nop 0 3659; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 3660; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3661; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3662; GFX940-NEXT: s_waitcnt vmcnt(0) 3663; GFX940-NEXT: s_setpc_b64 s[30:31] 3664 %vec0 = call <4 x half> asm "; def $0", "=v"() 3665 %vec1 = call <4 x half> asm "; def $0", "=v"() 3666 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 2> 3667 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3668 ret void 3669} 3670 3671define void @v_shuffle_v3f16_v4f16__7_6_2(ptr addrspace(1) inreg %ptr) { 3672; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_2: 3673; GFX900: ; %bb.0: 3674; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3675; GFX900-NEXT: v_mov_b32_e32 v4, 0 3676; GFX900-NEXT: ;;#ASMSTART 3677; GFX900-NEXT: ; def v[0:1] 3678; GFX900-NEXT: ;;#ASMEND 3679; GFX900-NEXT: ;;#ASMSTART 3680; GFX900-NEXT: ; def v[2:3] 3681; GFX900-NEXT: ;;#ASMEND 3682; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16 3683; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 3684; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3685; GFX900-NEXT: s_waitcnt vmcnt(0) 3686; GFX900-NEXT: s_setpc_b64 s[30:31] 3687; 3688; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_2: 3689; GFX90A: ; %bb.0: 3690; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3691; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3692; GFX90A-NEXT: ;;#ASMSTART 3693; GFX90A-NEXT: ; def v[0:1] 3694; GFX90A-NEXT: ;;#ASMEND 3695; GFX90A-NEXT: ;;#ASMSTART 3696; GFX90A-NEXT: ; def v[2:3] 3697; GFX90A-NEXT: ;;#ASMEND 3698; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 3699; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 3700; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3701; GFX90A-NEXT: s_waitcnt vmcnt(0) 3702; GFX90A-NEXT: s_setpc_b64 s[30:31] 3703; 3704; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_2: 3705; GFX940: ; %bb.0: 3706; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3707; GFX940-NEXT: v_mov_b32_e32 v4, 0 3708; GFX940-NEXT: ;;#ASMSTART 3709; GFX940-NEXT: ; def v[0:1] 3710; GFX940-NEXT: ;;#ASMEND 3711; GFX940-NEXT: ;;#ASMSTART 3712; GFX940-NEXT: ; def v[2:3] 3713; GFX940-NEXT: ;;#ASMEND 3714; GFX940-NEXT: s_nop 0 3715; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 3716; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 3717; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3718; GFX940-NEXT: s_waitcnt vmcnt(0) 3719; GFX940-NEXT: s_setpc_b64 s[30:31] 3720 %vec0 = call <4 x half> asm "; def $0", "=v"() 3721 %vec1 = call <4 x half> asm "; def $0", "=v"() 3722 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 2> 3723 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3724 ret void 3725} 3726 3727define void @v_shuffle_v3f16_v4f16__u_3_3(ptr addrspace(1) inreg %ptr) { 3728; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_3_3: 3729; GFX900: ; %bb.0: 3730; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3731; GFX900-NEXT: ;;#ASMSTART 3732; GFX900-NEXT: ; def v[0:1] 3733; GFX900-NEXT: ;;#ASMEND 3734; GFX900-NEXT: s_mov_b32 s4, 0xffff 3735; GFX900-NEXT: v_mov_b32_e32 v2, 0 3736; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 3737; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3738; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3739; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3740; GFX900-NEXT: s_waitcnt vmcnt(0) 3741; GFX900-NEXT: s_setpc_b64 s[30:31] 3742; 3743; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_3_3: 3744; GFX90A: ; %bb.0: 3745; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3746; GFX90A-NEXT: ;;#ASMSTART 3747; GFX90A-NEXT: ; def v[0:1] 3748; GFX90A-NEXT: ;;#ASMEND 3749; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3750; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3751; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 3752; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3753; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3754; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3755; GFX90A-NEXT: s_waitcnt vmcnt(0) 3756; GFX90A-NEXT: s_setpc_b64 s[30:31] 3757; 3758; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_3_3: 3759; GFX940: ; %bb.0: 3760; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3761; GFX940-NEXT: ;;#ASMSTART 3762; GFX940-NEXT: ; def v[0:1] 3763; GFX940-NEXT: ;;#ASMEND 3764; GFX940-NEXT: s_mov_b32 s2, 0xffff 3765; GFX940-NEXT: v_mov_b32_e32 v2, 0 3766; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 3767; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3768; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3769; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3770; GFX940-NEXT: s_waitcnt vmcnt(0) 3771; GFX940-NEXT: s_setpc_b64 s[30:31] 3772 %vec0 = call <4 x half> asm "; def $0", "=v"() 3773 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 3, i32 3> 3774 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3775 ret void 3776} 3777 3778define void @v_shuffle_v3f16_v4f16__0_3_3(ptr addrspace(1) inreg %ptr) { 3779; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_3_3: 3780; GFX900: ; %bb.0: 3781; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3782; GFX900-NEXT: ;;#ASMSTART 3783; GFX900-NEXT: ; def v[0:1] 3784; GFX900-NEXT: ;;#ASMEND 3785; GFX900-NEXT: s_mov_b32 s4, 0xffff 3786; GFX900-NEXT: v_mov_b32_e32 v2, 0 3787; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 3788; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3789; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3790; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3791; GFX900-NEXT: s_waitcnt vmcnt(0) 3792; GFX900-NEXT: s_setpc_b64 s[30:31] 3793; 3794; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_3_3: 3795; GFX90A: ; %bb.0: 3796; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3797; GFX90A-NEXT: ;;#ASMSTART 3798; GFX90A-NEXT: ; def v[0:1] 3799; GFX90A-NEXT: ;;#ASMEND 3800; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3801; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3802; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 3803; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3804; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3805; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3806; GFX90A-NEXT: s_waitcnt vmcnt(0) 3807; GFX90A-NEXT: s_setpc_b64 s[30:31] 3808; 3809; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_3_3: 3810; GFX940: ; %bb.0: 3811; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3812; GFX940-NEXT: ;;#ASMSTART 3813; GFX940-NEXT: ; def v[0:1] 3814; GFX940-NEXT: ;;#ASMEND 3815; GFX940-NEXT: s_mov_b32 s2, 0xffff 3816; GFX940-NEXT: v_mov_b32_e32 v2, 0 3817; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 3818; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3819; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3820; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3821; GFX940-NEXT: s_waitcnt vmcnt(0) 3822; GFX940-NEXT: s_setpc_b64 s[30:31] 3823 %vec0 = call <4 x half> asm "; def $0", "=v"() 3824 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 3, i32 3> 3825 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3826 ret void 3827} 3828 3829define void @v_shuffle_v3f16_v4f16__1_3_3(ptr addrspace(1) inreg %ptr) { 3830; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_3_3: 3831; GFX900: ; %bb.0: 3832; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3833; GFX900-NEXT: ;;#ASMSTART 3834; GFX900-NEXT: ; def v[0:1] 3835; GFX900-NEXT: ;;#ASMEND 3836; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3837; GFX900-NEXT: v_mov_b32_e32 v2, 0 3838; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 3839; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3840; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3841; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3842; GFX900-NEXT: s_waitcnt vmcnt(0) 3843; GFX900-NEXT: s_setpc_b64 s[30:31] 3844; 3845; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_3_3: 3846; GFX90A: ; %bb.0: 3847; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3848; GFX90A-NEXT: ;;#ASMSTART 3849; GFX90A-NEXT: ; def v[0:1] 3850; GFX90A-NEXT: ;;#ASMEND 3851; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3852; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3853; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 3854; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3855; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3856; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 3857; GFX90A-NEXT: s_waitcnt vmcnt(0) 3858; GFX90A-NEXT: s_setpc_b64 s[30:31] 3859; 3860; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_3_3: 3861; GFX940: ; %bb.0: 3862; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3863; GFX940-NEXT: ;;#ASMSTART 3864; GFX940-NEXT: ; def v[0:1] 3865; GFX940-NEXT: ;;#ASMEND 3866; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3867; GFX940-NEXT: v_mov_b32_e32 v2, 0 3868; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 3869; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3870; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3871; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 3872; GFX940-NEXT: s_waitcnt vmcnt(0) 3873; GFX940-NEXT: s_setpc_b64 s[30:31] 3874 %vec0 = call <4 x half> asm "; def $0", "=v"() 3875 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 3, i32 3> 3876 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3877 ret void 3878} 3879 3880define void @v_shuffle_v3f16_v4f16__2_3_3(ptr addrspace(1) inreg %ptr) { 3881; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_3_3: 3882; GFX900: ; %bb.0: 3883; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3884; GFX900-NEXT: v_mov_b32_e32 v2, 0 3885; GFX900-NEXT: ;;#ASMSTART 3886; GFX900-NEXT: ; def v[0:1] 3887; GFX900-NEXT: ;;#ASMEND 3888; GFX900-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 3889; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3890; GFX900-NEXT: s_waitcnt vmcnt(0) 3891; GFX900-NEXT: s_setpc_b64 s[30:31] 3892; 3893; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_3_3: 3894; GFX90A: ; %bb.0: 3895; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3896; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3897; GFX90A-NEXT: ;;#ASMSTART 3898; GFX90A-NEXT: ; def v[0:1] 3899; GFX90A-NEXT: ;;#ASMEND 3900; GFX90A-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 3901; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3902; GFX90A-NEXT: s_waitcnt vmcnt(0) 3903; GFX90A-NEXT: s_setpc_b64 s[30:31] 3904; 3905; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_3_3: 3906; GFX940: ; %bb.0: 3907; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3908; GFX940-NEXT: v_mov_b32_e32 v2, 0 3909; GFX940-NEXT: ;;#ASMSTART 3910; GFX940-NEXT: ; def v[0:1] 3911; GFX940-NEXT: ;;#ASMEND 3912; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 3913; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3914; GFX940-NEXT: s_waitcnt vmcnt(0) 3915; GFX940-NEXT: s_setpc_b64 s[30:31] 3916 %vec0 = call <4 x half> asm "; def $0", "=v"() 3917 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 3, i32 3> 3918 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3919 ret void 3920} 3921 3922define void @v_shuffle_v3f16_v4f16__3_3_3(ptr addrspace(1) inreg %ptr) { 3923; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_3_3: 3924; GFX900: ; %bb.0: 3925; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3926; GFX900-NEXT: ;;#ASMSTART 3927; GFX900-NEXT: ; def v[0:1] 3928; GFX900-NEXT: ;;#ASMEND 3929; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3930; GFX900-NEXT: v_mov_b32_e32 v2, 0 3931; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 3932; GFX900-NEXT: v_perm_b32 v1, v1, v1, s4 3933; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3934; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 3935; GFX900-NEXT: s_waitcnt vmcnt(0) 3936; GFX900-NEXT: s_setpc_b64 s[30:31] 3937; 3938; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_3_3: 3939; GFX90A: ; %bb.0: 3940; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3941; GFX90A-NEXT: ;;#ASMSTART 3942; GFX90A-NEXT: ; def v[0:1] 3943; GFX90A-NEXT: ;;#ASMEND 3944; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3945; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3946; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v1 3947; GFX90A-NEXT: v_perm_b32 v1, v1, v1, s4 3948; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3949; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 3950; GFX90A-NEXT: s_waitcnt vmcnt(0) 3951; GFX90A-NEXT: s_setpc_b64 s[30:31] 3952; 3953; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_3_3: 3954; GFX940: ; %bb.0: 3955; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3956; GFX940-NEXT: ;;#ASMSTART 3957; GFX940-NEXT: ; def v[0:1] 3958; GFX940-NEXT: ;;#ASMEND 3959; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3960; GFX940-NEXT: v_mov_b32_e32 v2, 0 3961; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 3962; GFX940-NEXT: v_perm_b32 v1, v1, v1, s2 3963; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3964; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 3965; GFX940-NEXT: s_waitcnt vmcnt(0) 3966; GFX940-NEXT: s_setpc_b64 s[30:31] 3967 %vec0 = call <4 x half> asm "; def $0", "=v"() 3968 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 3, i32 3> 3969 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 3970 ret void 3971} 3972 3973define void @v_shuffle_v3f16_v4f16__4_3_3(ptr addrspace(1) inreg %ptr) { 3974; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_3_3: 3975; GFX900: ; %bb.0: 3976; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3977; GFX900-NEXT: ;;#ASMSTART 3978; GFX900-NEXT: ; def v[0:1] 3979; GFX900-NEXT: ;;#ASMEND 3980; GFX900-NEXT: s_mov_b32 s4, 0xffff 3981; GFX900-NEXT: v_mov_b32_e32 v2, 0 3982; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 3983; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3984; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3985; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 3986; GFX900-NEXT: s_waitcnt vmcnt(0) 3987; GFX900-NEXT: s_setpc_b64 s[30:31] 3988; 3989; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_3_3: 3990; GFX90A: ; %bb.0: 3991; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3992; GFX90A-NEXT: ;;#ASMSTART 3993; GFX90A-NEXT: ; def v[0:1] 3994; GFX90A-NEXT: ;;#ASMEND 3995; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3996; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3997; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 3998; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 3999; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4000; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 4001; GFX90A-NEXT: s_waitcnt vmcnt(0) 4002; GFX90A-NEXT: s_setpc_b64 s[30:31] 4003; 4004; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_3_3: 4005; GFX940: ; %bb.0: 4006; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4007; GFX940-NEXT: ;;#ASMSTART 4008; GFX940-NEXT: ; def v[0:1] 4009; GFX940-NEXT: ;;#ASMEND 4010; GFX940-NEXT: s_mov_b32 s2, 0xffff 4011; GFX940-NEXT: v_mov_b32_e32 v2, 0 4012; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 4013; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4014; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4015; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 4016; GFX940-NEXT: s_waitcnt vmcnt(0) 4017; GFX940-NEXT: s_setpc_b64 s[30:31] 4018 %vec0 = call <4 x half> asm "; def $0", "=v"() 4019 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 3, i32 3> 4020 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4021 ret void 4022} 4023 4024define void @v_shuffle_v3f16_v4f16__5_3_3(ptr addrspace(1) inreg %ptr) { 4025; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_3_3: 4026; GFX900: ; %bb.0: 4027; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4028; GFX900-NEXT: ;;#ASMSTART 4029; GFX900-NEXT: ; def v[0:1] 4030; GFX900-NEXT: ;;#ASMEND 4031; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4032; GFX900-NEXT: v_mov_b32_e32 v4, 0 4033; GFX900-NEXT: ;;#ASMSTART 4034; GFX900-NEXT: ; def v[2:3] 4035; GFX900-NEXT: ;;#ASMEND 4036; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4 4037; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4038; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4039; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 4040; GFX900-NEXT: s_waitcnt vmcnt(0) 4041; GFX900-NEXT: s_setpc_b64 s[30:31] 4042; 4043; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_3_3: 4044; GFX90A: ; %bb.0: 4045; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4046; GFX90A-NEXT: ;;#ASMSTART 4047; GFX90A-NEXT: ; def v[0:1] 4048; GFX90A-NEXT: ;;#ASMEND 4049; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4050; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4051; GFX90A-NEXT: ;;#ASMSTART 4052; GFX90A-NEXT: ; def v[2:3] 4053; GFX90A-NEXT: ;;#ASMEND 4054; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4 4055; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4056; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4057; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 4058; GFX90A-NEXT: s_waitcnt vmcnt(0) 4059; GFX90A-NEXT: s_setpc_b64 s[30:31] 4060; 4061; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_3_3: 4062; GFX940: ; %bb.0: 4063; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4064; GFX940-NEXT: ;;#ASMSTART 4065; GFX940-NEXT: ; def v[0:1] 4066; GFX940-NEXT: ;;#ASMEND 4067; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4068; GFX940-NEXT: v_mov_b32_e32 v4, 0 4069; GFX940-NEXT: ;;#ASMSTART 4070; GFX940-NEXT: ; def v[2:3] 4071; GFX940-NEXT: ;;#ASMEND 4072; GFX940-NEXT: s_nop 0 4073; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 4074; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4075; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4076; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 4077; GFX940-NEXT: s_waitcnt vmcnt(0) 4078; GFX940-NEXT: s_setpc_b64 s[30:31] 4079 %vec0 = call <4 x half> asm "; def $0", "=v"() 4080 %vec1 = call <4 x half> asm "; def $0", "=v"() 4081 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 3, i32 3> 4082 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4083 ret void 4084} 4085 4086define void @v_shuffle_v3f16_v4f16__6_3_3(ptr addrspace(1) inreg %ptr) { 4087; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_3_3: 4088; GFX900: ; %bb.0: 4089; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4090; GFX900-NEXT: ;;#ASMSTART 4091; GFX900-NEXT: ; def v[0:1] 4092; GFX900-NEXT: ;;#ASMEND 4093; GFX900-NEXT: s_mov_b32 s4, 0xffff 4094; GFX900-NEXT: v_mov_b32_e32 v4, 0 4095; GFX900-NEXT: ;;#ASMSTART 4096; GFX900-NEXT: ; def v[2:3] 4097; GFX900-NEXT: ;;#ASMEND 4098; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v1 4099; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4100; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 4101; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4102; GFX900-NEXT: s_waitcnt vmcnt(0) 4103; GFX900-NEXT: s_setpc_b64 s[30:31] 4104; 4105; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_3_3: 4106; GFX90A: ; %bb.0: 4107; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4108; GFX90A-NEXT: ;;#ASMSTART 4109; GFX90A-NEXT: ; def v[0:1] 4110; GFX90A-NEXT: ;;#ASMEND 4111; GFX90A-NEXT: s_mov_b32 s4, 0xffff 4112; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4113; GFX90A-NEXT: ;;#ASMSTART 4114; GFX90A-NEXT: ; def v[2:3] 4115; GFX90A-NEXT: ;;#ASMEND 4116; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v1 4117; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4118; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 4119; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4120; GFX90A-NEXT: s_waitcnt vmcnt(0) 4121; GFX90A-NEXT: s_setpc_b64 s[30:31] 4122; 4123; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_3_3: 4124; GFX940: ; %bb.0: 4125; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4126; GFX940-NEXT: ;;#ASMSTART 4127; GFX940-NEXT: ; def v[0:1] 4128; GFX940-NEXT: ;;#ASMEND 4129; GFX940-NEXT: s_mov_b32 s2, 0xffff 4130; GFX940-NEXT: v_mov_b32_e32 v4, 0 4131; GFX940-NEXT: ;;#ASMSTART 4132; GFX940-NEXT: ; def v[2:3] 4133; GFX940-NEXT: ;;#ASMEND 4134; GFX940-NEXT: s_nop 0 4135; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 4136; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4137; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 4138; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4139; GFX940-NEXT: s_waitcnt vmcnt(0) 4140; GFX940-NEXT: s_setpc_b64 s[30:31] 4141 %vec0 = call <4 x half> asm "; def $0", "=v"() 4142 %vec1 = call <4 x half> asm "; def $0", "=v"() 4143 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 3, i32 3> 4144 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4145 ret void 4146} 4147 4148define void @v_shuffle_v3f16_v4f16__7_3_3(ptr addrspace(1) inreg %ptr) { 4149; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_3: 4150; GFX900: ; %bb.0: 4151; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4152; GFX900-NEXT: ;;#ASMSTART 4153; GFX900-NEXT: ; def v[0:1] 4154; GFX900-NEXT: ;;#ASMEND 4155; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4156; GFX900-NEXT: v_mov_b32_e32 v4, 0 4157; GFX900-NEXT: ;;#ASMSTART 4158; GFX900-NEXT: ; def v[2:3] 4159; GFX900-NEXT: ;;#ASMEND 4160; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 4161; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4162; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4163; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 4164; GFX900-NEXT: s_waitcnt vmcnt(0) 4165; GFX900-NEXT: s_setpc_b64 s[30:31] 4166; 4167; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_3: 4168; GFX90A: ; %bb.0: 4169; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4170; GFX90A-NEXT: ;;#ASMSTART 4171; GFX90A-NEXT: ; def v[0:1] 4172; GFX90A-NEXT: ;;#ASMEND 4173; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4174; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4175; GFX90A-NEXT: ;;#ASMSTART 4176; GFX90A-NEXT: ; def v[2:3] 4177; GFX90A-NEXT: ;;#ASMEND 4178; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 4179; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4180; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4181; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 4182; GFX90A-NEXT: s_waitcnt vmcnt(0) 4183; GFX90A-NEXT: s_setpc_b64 s[30:31] 4184; 4185; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_3: 4186; GFX940: ; %bb.0: 4187; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4188; GFX940-NEXT: ;;#ASMSTART 4189; GFX940-NEXT: ; def v[0:1] 4190; GFX940-NEXT: ;;#ASMEND 4191; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4192; GFX940-NEXT: v_mov_b32_e32 v4, 0 4193; GFX940-NEXT: ;;#ASMSTART 4194; GFX940-NEXT: ; def v[2:3] 4195; GFX940-NEXT: ;;#ASMEND 4196; GFX940-NEXT: s_nop 0 4197; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 4198; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 4199; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4200; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 4201; GFX940-NEXT: s_waitcnt vmcnt(0) 4202; GFX940-NEXT: s_setpc_b64 s[30:31] 4203 %vec0 = call <4 x half> asm "; def $0", "=v"() 4204 %vec1 = call <4 x half> asm "; def $0", "=v"() 4205 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 3> 4206 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4207 ret void 4208} 4209 4210define void @v_shuffle_v3f16_v4f16__7_u_3(ptr addrspace(1) inreg %ptr) { 4211; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_3: 4212; GFX900: ; %bb.0: 4213; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4214; GFX900-NEXT: v_mov_b32_e32 v4, 0 4215; GFX900-NEXT: ;;#ASMSTART 4216; GFX900-NEXT: ; def v[0:1] 4217; GFX900-NEXT: ;;#ASMEND 4218; GFX900-NEXT: ;;#ASMSTART 4219; GFX900-NEXT: ; def v[2:3] 4220; GFX900-NEXT: ;;#ASMEND 4221; GFX900-NEXT: v_alignbit_b32 v0, s4, v3, 16 4222; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4223; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4224; GFX900-NEXT: s_waitcnt vmcnt(0) 4225; GFX900-NEXT: s_setpc_b64 s[30:31] 4226; 4227; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_3: 4228; GFX90A: ; %bb.0: 4229; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4230; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4231; GFX90A-NEXT: ;;#ASMSTART 4232; GFX90A-NEXT: ; def v[0:1] 4233; GFX90A-NEXT: ;;#ASMEND 4234; GFX90A-NEXT: ;;#ASMSTART 4235; GFX90A-NEXT: ; def v[2:3] 4236; GFX90A-NEXT: ;;#ASMEND 4237; GFX90A-NEXT: v_alignbit_b32 v0, s4, v3, 16 4238; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4239; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4240; GFX90A-NEXT: s_waitcnt vmcnt(0) 4241; GFX90A-NEXT: s_setpc_b64 s[30:31] 4242; 4243; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_3: 4244; GFX940: ; %bb.0: 4245; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4246; GFX940-NEXT: v_mov_b32_e32 v4, 0 4247; GFX940-NEXT: ;;#ASMSTART 4248; GFX940-NEXT: ; def v[0:1] 4249; GFX940-NEXT: ;;#ASMEND 4250; GFX940-NEXT: ;;#ASMSTART 4251; GFX940-NEXT: ; def v[2:3] 4252; GFX940-NEXT: ;;#ASMEND 4253; GFX940-NEXT: s_nop 0 4254; GFX940-NEXT: v_alignbit_b32 v0, s0, v3, 16 4255; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4256; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4257; GFX940-NEXT: s_waitcnt vmcnt(0) 4258; GFX940-NEXT: s_setpc_b64 s[30:31] 4259 %vec0 = call <4 x half> asm "; def $0", "=v"() 4260 %vec1 = call <4 x half> asm "; def $0", "=v"() 4261 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 3> 4262 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4263 ret void 4264} 4265 4266define void @v_shuffle_v3f16_v4f16__7_0_3(ptr addrspace(1) inreg %ptr) { 4267; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_3: 4268; GFX900: ; %bb.0: 4269; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4270; GFX900-NEXT: v_mov_b32_e32 v4, 0 4271; GFX900-NEXT: ;;#ASMSTART 4272; GFX900-NEXT: ; def v[0:1] 4273; GFX900-NEXT: ;;#ASMEND 4274; GFX900-NEXT: ;;#ASMSTART 4275; GFX900-NEXT: ; def v[2:3] 4276; GFX900-NEXT: ;;#ASMEND 4277; GFX900-NEXT: v_alignbit_b32 v0, v0, v3, 16 4278; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4279; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4280; GFX900-NEXT: s_waitcnt vmcnt(0) 4281; GFX900-NEXT: s_setpc_b64 s[30:31] 4282; 4283; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_3: 4284; GFX90A: ; %bb.0: 4285; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4286; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4287; GFX90A-NEXT: ;;#ASMSTART 4288; GFX90A-NEXT: ; def v[0:1] 4289; GFX90A-NEXT: ;;#ASMEND 4290; GFX90A-NEXT: ;;#ASMSTART 4291; GFX90A-NEXT: ; def v[2:3] 4292; GFX90A-NEXT: ;;#ASMEND 4293; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 4294; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4295; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4296; GFX90A-NEXT: s_waitcnt vmcnt(0) 4297; GFX90A-NEXT: s_setpc_b64 s[30:31] 4298; 4299; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_3: 4300; GFX940: ; %bb.0: 4301; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4302; GFX940-NEXT: v_mov_b32_e32 v4, 0 4303; GFX940-NEXT: ;;#ASMSTART 4304; GFX940-NEXT: ; def v[0:1] 4305; GFX940-NEXT: ;;#ASMEND 4306; GFX940-NEXT: ;;#ASMSTART 4307; GFX940-NEXT: ; def v[2:3] 4308; GFX940-NEXT: ;;#ASMEND 4309; GFX940-NEXT: s_nop 0 4310; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 4311; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4312; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4313; GFX940-NEXT: s_waitcnt vmcnt(0) 4314; GFX940-NEXT: s_setpc_b64 s[30:31] 4315 %vec0 = call <4 x half> asm "; def $0", "=v"() 4316 %vec1 = call <4 x half> asm "; def $0", "=v"() 4317 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 3> 4318 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4319 ret void 4320} 4321 4322define void @v_shuffle_v3f16_v4f16__7_1_3(ptr addrspace(1) inreg %ptr) { 4323; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_3: 4324; GFX900: ; %bb.0: 4325; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4326; GFX900-NEXT: v_mov_b32_e32 v4, 0 4327; GFX900-NEXT: ;;#ASMSTART 4328; GFX900-NEXT: ; def v[0:1] 4329; GFX900-NEXT: ;;#ASMEND 4330; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4331; GFX900-NEXT: ;;#ASMSTART 4332; GFX900-NEXT: ; def v[2:3] 4333; GFX900-NEXT: ;;#ASMEND 4334; GFX900-NEXT: v_perm_b32 v0, v0, v3, s4 4335; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4336; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4337; GFX900-NEXT: s_waitcnt vmcnt(0) 4338; GFX900-NEXT: s_setpc_b64 s[30:31] 4339; 4340; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_3: 4341; GFX90A: ; %bb.0: 4342; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4343; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4344; GFX90A-NEXT: ;;#ASMSTART 4345; GFX90A-NEXT: ; def v[0:1] 4346; GFX90A-NEXT: ;;#ASMEND 4347; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4348; GFX90A-NEXT: ;;#ASMSTART 4349; GFX90A-NEXT: ; def v[2:3] 4350; GFX90A-NEXT: ;;#ASMEND 4351; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 4352; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4353; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4354; GFX90A-NEXT: s_waitcnt vmcnt(0) 4355; GFX90A-NEXT: s_setpc_b64 s[30:31] 4356; 4357; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_3: 4358; GFX940: ; %bb.0: 4359; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4360; GFX940-NEXT: v_mov_b32_e32 v4, 0 4361; GFX940-NEXT: ;;#ASMSTART 4362; GFX940-NEXT: ; def v[0:1] 4363; GFX940-NEXT: ;;#ASMEND 4364; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4365; GFX940-NEXT: ;;#ASMSTART 4366; GFX940-NEXT: ; def v[2:3] 4367; GFX940-NEXT: ;;#ASMEND 4368; GFX940-NEXT: s_nop 0 4369; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 4370; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4371; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4372; GFX940-NEXT: s_waitcnt vmcnt(0) 4373; GFX940-NEXT: s_setpc_b64 s[30:31] 4374 %vec0 = call <4 x half> asm "; def $0", "=v"() 4375 %vec1 = call <4 x half> asm "; def $0", "=v"() 4376 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 3> 4377 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4378 ret void 4379} 4380 4381define void @v_shuffle_v3f16_v4f16__7_2_3(ptr addrspace(1) inreg %ptr) { 4382; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_3: 4383; GFX900: ; %bb.0: 4384; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4385; GFX900-NEXT: v_mov_b32_e32 v4, 0 4386; GFX900-NEXT: ;;#ASMSTART 4387; GFX900-NEXT: ; def v[0:1] 4388; GFX900-NEXT: ;;#ASMEND 4389; GFX900-NEXT: ;;#ASMSTART 4390; GFX900-NEXT: ; def v[2:3] 4391; GFX900-NEXT: ;;#ASMEND 4392; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 4393; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4394; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4395; GFX900-NEXT: s_waitcnt vmcnt(0) 4396; GFX900-NEXT: s_setpc_b64 s[30:31] 4397; 4398; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_3: 4399; GFX90A: ; %bb.0: 4400; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4401; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4402; GFX90A-NEXT: ;;#ASMSTART 4403; GFX90A-NEXT: ; def v[0:1] 4404; GFX90A-NEXT: ;;#ASMEND 4405; GFX90A-NEXT: ;;#ASMSTART 4406; GFX90A-NEXT: ; def v[2:3] 4407; GFX90A-NEXT: ;;#ASMEND 4408; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 4409; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4410; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4411; GFX90A-NEXT: s_waitcnt vmcnt(0) 4412; GFX90A-NEXT: s_setpc_b64 s[30:31] 4413; 4414; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_3: 4415; GFX940: ; %bb.0: 4416; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4417; GFX940-NEXT: v_mov_b32_e32 v4, 0 4418; GFX940-NEXT: ;;#ASMSTART 4419; GFX940-NEXT: ; def v[0:1] 4420; GFX940-NEXT: ;;#ASMEND 4421; GFX940-NEXT: ;;#ASMSTART 4422; GFX940-NEXT: ; def v[2:3] 4423; GFX940-NEXT: ;;#ASMEND 4424; GFX940-NEXT: s_nop 0 4425; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 4426; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4427; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4428; GFX940-NEXT: s_waitcnt vmcnt(0) 4429; GFX940-NEXT: s_setpc_b64 s[30:31] 4430 %vec0 = call <4 x half> asm "; def $0", "=v"() 4431 %vec1 = call <4 x half> asm "; def $0", "=v"() 4432 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 3> 4433 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4434 ret void 4435} 4436 4437define void @v_shuffle_v3f16_v4f16__7_4_3(ptr addrspace(1) inreg %ptr) { 4438; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_3: 4439; GFX900: ; %bb.0: 4440; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4441; GFX900-NEXT: v_mov_b32_e32 v4, 0 4442; GFX900-NEXT: ;;#ASMSTART 4443; GFX900-NEXT: ; def v[0:1] 4444; GFX900-NEXT: ;;#ASMEND 4445; GFX900-NEXT: ;;#ASMSTART 4446; GFX900-NEXT: ; def v[2:3] 4447; GFX900-NEXT: ;;#ASMEND 4448; GFX900-NEXT: v_alignbit_b32 v0, v2, v3, 16 4449; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4450; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4451; GFX900-NEXT: s_waitcnt vmcnt(0) 4452; GFX900-NEXT: s_setpc_b64 s[30:31] 4453; 4454; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_3: 4455; GFX90A: ; %bb.0: 4456; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4457; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4458; GFX90A-NEXT: ;;#ASMSTART 4459; GFX90A-NEXT: ; def v[0:1] 4460; GFX90A-NEXT: ;;#ASMEND 4461; GFX90A-NEXT: ;;#ASMSTART 4462; GFX90A-NEXT: ; def v[2:3] 4463; GFX90A-NEXT: ;;#ASMEND 4464; GFX90A-NEXT: v_alignbit_b32 v0, v2, v3, 16 4465; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4466; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4467; GFX90A-NEXT: s_waitcnt vmcnt(0) 4468; GFX90A-NEXT: s_setpc_b64 s[30:31] 4469; 4470; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_3: 4471; GFX940: ; %bb.0: 4472; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4473; GFX940-NEXT: v_mov_b32_e32 v4, 0 4474; GFX940-NEXT: ;;#ASMSTART 4475; GFX940-NEXT: ; def v[0:1] 4476; GFX940-NEXT: ;;#ASMEND 4477; GFX940-NEXT: ;;#ASMSTART 4478; GFX940-NEXT: ; def v[2:3] 4479; GFX940-NEXT: ;;#ASMEND 4480; GFX940-NEXT: s_nop 0 4481; GFX940-NEXT: v_alignbit_b32 v0, v2, v3, 16 4482; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4483; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4484; GFX940-NEXT: s_waitcnt vmcnt(0) 4485; GFX940-NEXT: s_setpc_b64 s[30:31] 4486 %vec0 = call <4 x half> asm "; def $0", "=v"() 4487 %vec1 = call <4 x half> asm "; def $0", "=v"() 4488 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 3> 4489 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4490 ret void 4491} 4492 4493define void @v_shuffle_v3f16_v4f16__7_5_3(ptr addrspace(1) inreg %ptr) { 4494; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_3: 4495; GFX900: ; %bb.0: 4496; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4497; GFX900-NEXT: v_mov_b32_e32 v4, 0 4498; GFX900-NEXT: ;;#ASMSTART 4499; GFX900-NEXT: ; def v[0:1] 4500; GFX900-NEXT: ;;#ASMEND 4501; GFX900-NEXT: s_mov_b32 s4, 0x7060302 4502; GFX900-NEXT: ;;#ASMSTART 4503; GFX900-NEXT: ; def v[2:3] 4504; GFX900-NEXT: ;;#ASMEND 4505; GFX900-NEXT: v_perm_b32 v0, v2, v3, s4 4506; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4507; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4508; GFX900-NEXT: s_waitcnt vmcnt(0) 4509; GFX900-NEXT: s_setpc_b64 s[30:31] 4510; 4511; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_3: 4512; GFX90A: ; %bb.0: 4513; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4514; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4515; GFX90A-NEXT: ;;#ASMSTART 4516; GFX90A-NEXT: ; def v[0:1] 4517; GFX90A-NEXT: ;;#ASMEND 4518; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 4519; GFX90A-NEXT: ;;#ASMSTART 4520; GFX90A-NEXT: ; def v[2:3] 4521; GFX90A-NEXT: ;;#ASMEND 4522; GFX90A-NEXT: v_perm_b32 v0, v2, v3, s4 4523; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4524; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4525; GFX90A-NEXT: s_waitcnt vmcnt(0) 4526; GFX90A-NEXT: s_setpc_b64 s[30:31] 4527; 4528; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_3: 4529; GFX940: ; %bb.0: 4530; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4531; GFX940-NEXT: v_mov_b32_e32 v4, 0 4532; GFX940-NEXT: ;;#ASMSTART 4533; GFX940-NEXT: ; def v[0:1] 4534; GFX940-NEXT: ;;#ASMEND 4535; GFX940-NEXT: s_mov_b32 s2, 0x7060302 4536; GFX940-NEXT: ;;#ASMSTART 4537; GFX940-NEXT: ; def v[2:3] 4538; GFX940-NEXT: ;;#ASMEND 4539; GFX940-NEXT: s_nop 0 4540; GFX940-NEXT: v_perm_b32 v0, v2, v3, s2 4541; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4542; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4543; GFX940-NEXT: s_waitcnt vmcnt(0) 4544; GFX940-NEXT: s_setpc_b64 s[30:31] 4545 %vec0 = call <4 x half> asm "; def $0", "=v"() 4546 %vec1 = call <4 x half> asm "; def $0", "=v"() 4547 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 3> 4548 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4549 ret void 4550} 4551 4552define void @v_shuffle_v3f16_v4f16__7_6_3(ptr addrspace(1) inreg %ptr) { 4553; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_3: 4554; GFX900: ; %bb.0: 4555; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4556; GFX900-NEXT: v_mov_b32_e32 v4, 0 4557; GFX900-NEXT: ;;#ASMSTART 4558; GFX900-NEXT: ; def v[0:1] 4559; GFX900-NEXT: ;;#ASMEND 4560; GFX900-NEXT: ;;#ASMSTART 4561; GFX900-NEXT: ; def v[2:3] 4562; GFX900-NEXT: ;;#ASMEND 4563; GFX900-NEXT: v_alignbit_b32 v0, v3, v3, 16 4564; GFX900-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4565; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 4566; GFX900-NEXT: s_waitcnt vmcnt(0) 4567; GFX900-NEXT: s_setpc_b64 s[30:31] 4568; 4569; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_3: 4570; GFX90A: ; %bb.0: 4571; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4572; GFX90A-NEXT: v_mov_b32_e32 v4, 0 4573; GFX90A-NEXT: ;;#ASMSTART 4574; GFX90A-NEXT: ; def v[0:1] 4575; GFX90A-NEXT: ;;#ASMEND 4576; GFX90A-NEXT: ;;#ASMSTART 4577; GFX90A-NEXT: ; def v[2:3] 4578; GFX90A-NEXT: ;;#ASMEND 4579; GFX90A-NEXT: v_alignbit_b32 v0, v3, v3, 16 4580; GFX90A-NEXT: global_store_short_d16_hi v4, v1, s[16:17] offset:4 4581; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 4582; GFX90A-NEXT: s_waitcnt vmcnt(0) 4583; GFX90A-NEXT: s_setpc_b64 s[30:31] 4584; 4585; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_3: 4586; GFX940: ; %bb.0: 4587; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4588; GFX940-NEXT: v_mov_b32_e32 v4, 0 4589; GFX940-NEXT: ;;#ASMSTART 4590; GFX940-NEXT: ; def v[0:1] 4591; GFX940-NEXT: ;;#ASMEND 4592; GFX940-NEXT: ;;#ASMSTART 4593; GFX940-NEXT: ; def v[2:3] 4594; GFX940-NEXT: ;;#ASMEND 4595; GFX940-NEXT: s_nop 0 4596; GFX940-NEXT: v_alignbit_b32 v0, v3, v3, 16 4597; GFX940-NEXT: global_store_short_d16_hi v4, v1, s[0:1] offset:4 sc0 sc1 4598; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 4599; GFX940-NEXT: s_waitcnt vmcnt(0) 4600; GFX940-NEXT: s_setpc_b64 s[30:31] 4601 %vec0 = call <4 x half> asm "; def $0", "=v"() 4602 %vec1 = call <4 x half> asm "; def $0", "=v"() 4603 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 3> 4604 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4605 ret void 4606} 4607 4608define void @v_shuffle_v3f16_v4f16__u_4_4(ptr addrspace(1) inreg %ptr) { 4609; GFX9-LABEL: v_shuffle_v3f16_v4f16__u_4_4: 4610; GFX9: ; %bb.0: 4611; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4612; GFX9-NEXT: s_setpc_b64 s[30:31] 4613 %vec0 = call <4 x half> asm "; def $0", "=v"() 4614 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 4, i32 4> 4615 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4616 ret void 4617} 4618 4619define void @v_shuffle_v3f16_v4f16__0_4_4(ptr addrspace(1) inreg %ptr) { 4620; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_4_4: 4621; GFX900: ; %bb.0: 4622; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4623; GFX900-NEXT: v_mov_b32_e32 v2, 0 4624; GFX900-NEXT: ;;#ASMSTART 4625; GFX900-NEXT: ; def v[0:1] 4626; GFX900-NEXT: ;;#ASMEND 4627; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 4628; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4629; GFX900-NEXT: s_waitcnt vmcnt(0) 4630; GFX900-NEXT: s_setpc_b64 s[30:31] 4631; 4632; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_4_4: 4633; GFX90A: ; %bb.0: 4634; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4635; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4636; GFX90A-NEXT: ;;#ASMSTART 4637; GFX90A-NEXT: ; def v[0:1] 4638; GFX90A-NEXT: ;;#ASMEND 4639; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 4640; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4641; GFX90A-NEXT: s_waitcnt vmcnt(0) 4642; GFX90A-NEXT: s_setpc_b64 s[30:31] 4643; 4644; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_4_4: 4645; GFX940: ; %bb.0: 4646; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4647; GFX940-NEXT: v_mov_b32_e32 v2, 0 4648; GFX940-NEXT: ;;#ASMSTART 4649; GFX940-NEXT: ; def v[0:1] 4650; GFX940-NEXT: ;;#ASMEND 4651; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 4652; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4653; GFX940-NEXT: s_waitcnt vmcnt(0) 4654; GFX940-NEXT: s_setpc_b64 s[30:31] 4655 %vec0 = call <4 x half> asm "; def $0", "=v"() 4656 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 4, i32 4> 4657 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4658 ret void 4659} 4660 4661define void @v_shuffle_v3f16_v4f16__1_4_4(ptr addrspace(1) inreg %ptr) { 4662; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_4_4: 4663; GFX900: ; %bb.0: 4664; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4665; GFX900-NEXT: ;;#ASMSTART 4666; GFX900-NEXT: ; def v[0:1] 4667; GFX900-NEXT: ;;#ASMEND 4668; GFX900-NEXT: v_mov_b32_e32 v2, 0 4669; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 4670; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4671; GFX900-NEXT: s_waitcnt vmcnt(0) 4672; GFX900-NEXT: s_setpc_b64 s[30:31] 4673; 4674; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_4_4: 4675; GFX90A: ; %bb.0: 4676; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4677; GFX90A-NEXT: ;;#ASMSTART 4678; GFX90A-NEXT: ; def v[0:1] 4679; GFX90A-NEXT: ;;#ASMEND 4680; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4681; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 4682; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4683; GFX90A-NEXT: s_waitcnt vmcnt(0) 4684; GFX90A-NEXT: s_setpc_b64 s[30:31] 4685; 4686; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_4_4: 4687; GFX940: ; %bb.0: 4688; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4689; GFX940-NEXT: ;;#ASMSTART 4690; GFX940-NEXT: ; def v[0:1] 4691; GFX940-NEXT: ;;#ASMEND 4692; GFX940-NEXT: v_mov_b32_e32 v2, 0 4693; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 4694; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4695; GFX940-NEXT: s_waitcnt vmcnt(0) 4696; GFX940-NEXT: s_setpc_b64 s[30:31] 4697 %vec0 = call <4 x half> asm "; def $0", "=v"() 4698 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 4, i32 4> 4699 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4700 ret void 4701} 4702 4703define void @v_shuffle_v3f16_v4f16__2_4_4(ptr addrspace(1) inreg %ptr) { 4704; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_4_4: 4705; GFX900: ; %bb.0: 4706; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4707; GFX900-NEXT: v_mov_b32_e32 v2, 0 4708; GFX900-NEXT: ;;#ASMSTART 4709; GFX900-NEXT: ; def v[0:1] 4710; GFX900-NEXT: ;;#ASMEND 4711; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4712; GFX900-NEXT: s_waitcnt vmcnt(0) 4713; GFX900-NEXT: s_setpc_b64 s[30:31] 4714; 4715; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_4_4: 4716; GFX90A: ; %bb.0: 4717; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4718; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4719; GFX90A-NEXT: ;;#ASMSTART 4720; GFX90A-NEXT: ; def v[0:1] 4721; GFX90A-NEXT: ;;#ASMEND 4722; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4723; GFX90A-NEXT: s_waitcnt vmcnt(0) 4724; GFX90A-NEXT: s_setpc_b64 s[30:31] 4725; 4726; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_4_4: 4727; GFX940: ; %bb.0: 4728; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4729; GFX940-NEXT: v_mov_b32_e32 v2, 0 4730; GFX940-NEXT: ;;#ASMSTART 4731; GFX940-NEXT: ; def v[0:1] 4732; GFX940-NEXT: ;;#ASMEND 4733; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4734; GFX940-NEXT: s_waitcnt vmcnt(0) 4735; GFX940-NEXT: s_setpc_b64 s[30:31] 4736 %vec0 = call <4 x half> asm "; def $0", "=v"() 4737 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 4, i32 4> 4738 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4739 ret void 4740} 4741 4742define void @v_shuffle_v3f16_v4f16__3_4_4(ptr addrspace(1) inreg %ptr) { 4743; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_4_4: 4744; GFX900: ; %bb.0: 4745; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4746; GFX900-NEXT: ;;#ASMSTART 4747; GFX900-NEXT: ; def v[0:1] 4748; GFX900-NEXT: ;;#ASMEND 4749; GFX900-NEXT: v_mov_b32_e32 v2, 0 4750; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 4751; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 4752; GFX900-NEXT: s_waitcnt vmcnt(0) 4753; GFX900-NEXT: s_setpc_b64 s[30:31] 4754; 4755; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_4_4: 4756; GFX90A: ; %bb.0: 4757; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4758; GFX90A-NEXT: ;;#ASMSTART 4759; GFX90A-NEXT: ; def v[0:1] 4760; GFX90A-NEXT: ;;#ASMEND 4761; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4762; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 4763; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 4764; GFX90A-NEXT: s_waitcnt vmcnt(0) 4765; GFX90A-NEXT: s_setpc_b64 s[30:31] 4766; 4767; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_4_4: 4768; GFX940: ; %bb.0: 4769; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4770; GFX940-NEXT: ;;#ASMSTART 4771; GFX940-NEXT: ; def v[0:1] 4772; GFX940-NEXT: ;;#ASMEND 4773; GFX940-NEXT: v_mov_b32_e32 v2, 0 4774; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 4775; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 4776; GFX940-NEXT: s_waitcnt vmcnt(0) 4777; GFX940-NEXT: s_setpc_b64 s[30:31] 4778 %vec0 = call <4 x half> asm "; def $0", "=v"() 4779 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 4, i32 4> 4780 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4781 ret void 4782} 4783 4784define void @v_shuffle_v3f16_v4f16__4_4_4(ptr addrspace(1) inreg %ptr) { 4785; GFX9-LABEL: v_shuffle_v3f16_v4f16__4_4_4: 4786; GFX9: ; %bb.0: 4787; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4788; GFX9-NEXT: s_setpc_b64 s[30:31] 4789 %vec0 = call <4 x half> asm "; def $0", "=v"() 4790 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 4, i32 4> 4791 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4792 ret void 4793} 4794 4795define void @v_shuffle_v3f16_v4f16__5_4_4(ptr addrspace(1) inreg %ptr) { 4796; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_4_4: 4797; GFX900: ; %bb.0: 4798; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4799; GFX900-NEXT: v_mov_b32_e32 v2, 0 4800; GFX900-NEXT: ;;#ASMSTART 4801; GFX900-NEXT: ; def v[0:1] 4802; GFX900-NEXT: ;;#ASMEND 4803; GFX900-NEXT: v_alignbit_b32 v1, v0, v0, 16 4804; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 4805; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4806; GFX900-NEXT: s_waitcnt vmcnt(0) 4807; GFX900-NEXT: s_setpc_b64 s[30:31] 4808; 4809; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_4_4: 4810; GFX90A: ; %bb.0: 4811; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4812; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4813; GFX90A-NEXT: ;;#ASMSTART 4814; GFX90A-NEXT: ; def v[0:1] 4815; GFX90A-NEXT: ;;#ASMEND 4816; GFX90A-NEXT: v_alignbit_b32 v1, v0, v0, 16 4817; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 4818; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4819; GFX90A-NEXT: s_waitcnt vmcnt(0) 4820; GFX90A-NEXT: s_setpc_b64 s[30:31] 4821; 4822; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_4_4: 4823; GFX940: ; %bb.0: 4824; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4825; GFX940-NEXT: v_mov_b32_e32 v2, 0 4826; GFX940-NEXT: ;;#ASMSTART 4827; GFX940-NEXT: ; def v[0:1] 4828; GFX940-NEXT: ;;#ASMEND 4829; GFX940-NEXT: s_nop 0 4830; GFX940-NEXT: v_alignbit_b32 v1, v0, v0, 16 4831; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 4832; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4833; GFX940-NEXT: s_waitcnt vmcnt(0) 4834; GFX940-NEXT: s_setpc_b64 s[30:31] 4835 %vec0 = call <4 x half> asm "; def $0", "=v"() 4836 %vec1 = call <4 x half> asm "; def $0", "=v"() 4837 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 4, i32 4> 4838 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4839 ret void 4840} 4841 4842define void @v_shuffle_v3f16_v4f16__6_4_4(ptr addrspace(1) inreg %ptr) { 4843; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_4_4: 4844; GFX900: ; %bb.0: 4845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4846; GFX900-NEXT: v_mov_b32_e32 v2, 0 4847; GFX900-NEXT: ;;#ASMSTART 4848; GFX900-NEXT: ; def v[0:1] 4849; GFX900-NEXT: ;;#ASMEND 4850; GFX900-NEXT: s_mov_b32 s4, 0x5040100 4851; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 4852; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 4853; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4854; GFX900-NEXT: s_waitcnt vmcnt(0) 4855; GFX900-NEXT: s_setpc_b64 s[30:31] 4856; 4857; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_4_4: 4858; GFX90A: ; %bb.0: 4859; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4860; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4861; GFX90A-NEXT: ;;#ASMSTART 4862; GFX90A-NEXT: ; def v[0:1] 4863; GFX90A-NEXT: ;;#ASMEND 4864; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 4865; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 4866; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 4867; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4868; GFX90A-NEXT: s_waitcnt vmcnt(0) 4869; GFX90A-NEXT: s_setpc_b64 s[30:31] 4870; 4871; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_4_4: 4872; GFX940: ; %bb.0: 4873; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4874; GFX940-NEXT: v_mov_b32_e32 v2, 0 4875; GFX940-NEXT: ;;#ASMSTART 4876; GFX940-NEXT: ; def v[0:1] 4877; GFX940-NEXT: ;;#ASMEND 4878; GFX940-NEXT: s_mov_b32 s2, 0x5040100 4879; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 4880; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 4881; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4882; GFX940-NEXT: s_waitcnt vmcnt(0) 4883; GFX940-NEXT: s_setpc_b64 s[30:31] 4884 %vec0 = call <4 x half> asm "; def $0", "=v"() 4885 %vec1 = call <4 x half> asm "; def $0", "=v"() 4886 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 4, i32 4> 4887 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4888 ret void 4889} 4890 4891define void @v_shuffle_v3f16_v4f16__7_4_4(ptr addrspace(1) inreg %ptr) { 4892; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_4: 4893; GFX900: ; %bb.0: 4894; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4895; GFX900-NEXT: v_mov_b32_e32 v2, 0 4896; GFX900-NEXT: ;;#ASMSTART 4897; GFX900-NEXT: ; def v[0:1] 4898; GFX900-NEXT: ;;#ASMEND 4899; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 4900; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 4901; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4902; GFX900-NEXT: s_waitcnt vmcnt(0) 4903; GFX900-NEXT: s_setpc_b64 s[30:31] 4904; 4905; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_4: 4906; GFX90A: ; %bb.0: 4907; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4908; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4909; GFX90A-NEXT: ;;#ASMSTART 4910; GFX90A-NEXT: ; def v[0:1] 4911; GFX90A-NEXT: ;;#ASMEND 4912; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16 4913; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 4914; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4915; GFX90A-NEXT: s_waitcnt vmcnt(0) 4916; GFX90A-NEXT: s_setpc_b64 s[30:31] 4917; 4918; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_4: 4919; GFX940: ; %bb.0: 4920; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4921; GFX940-NEXT: v_mov_b32_e32 v2, 0 4922; GFX940-NEXT: ;;#ASMSTART 4923; GFX940-NEXT: ; def v[0:1] 4924; GFX940-NEXT: ;;#ASMEND 4925; GFX940-NEXT: s_nop 0 4926; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 4927; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 4928; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4929; GFX940-NEXT: s_waitcnt vmcnt(0) 4930; GFX940-NEXT: s_setpc_b64 s[30:31] 4931 %vec0 = call <4 x half> asm "; def $0", "=v"() 4932 %vec1 = call <4 x half> asm "; def $0", "=v"() 4933 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 4> 4934 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4935 ret void 4936} 4937 4938define void @v_shuffle_v3f16_v4f16__7_u_4(ptr addrspace(1) inreg %ptr) { 4939; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_4: 4940; GFX900: ; %bb.0: 4941; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4942; GFX900-NEXT: v_mov_b32_e32 v2, 0 4943; GFX900-NEXT: ;;#ASMSTART 4944; GFX900-NEXT: ; def v[0:1] 4945; GFX900-NEXT: ;;#ASMEND 4946; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 4947; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 4948; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 4949; GFX900-NEXT: s_waitcnt vmcnt(0) 4950; GFX900-NEXT: s_setpc_b64 s[30:31] 4951; 4952; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_4: 4953; GFX90A: ; %bb.0: 4954; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4955; GFX90A-NEXT: v_mov_b32_e32 v2, 0 4956; GFX90A-NEXT: ;;#ASMSTART 4957; GFX90A-NEXT: ; def v[0:1] 4958; GFX90A-NEXT: ;;#ASMEND 4959; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16 4960; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 4961; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 4962; GFX90A-NEXT: s_waitcnt vmcnt(0) 4963; GFX90A-NEXT: s_setpc_b64 s[30:31] 4964; 4965; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_4: 4966; GFX940: ; %bb.0: 4967; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4968; GFX940-NEXT: v_mov_b32_e32 v2, 0 4969; GFX940-NEXT: ;;#ASMSTART 4970; GFX940-NEXT: ; def v[0:1] 4971; GFX940-NEXT: ;;#ASMEND 4972; GFX940-NEXT: s_nop 0 4973; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 4974; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 4975; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 4976; GFX940-NEXT: s_waitcnt vmcnt(0) 4977; GFX940-NEXT: s_setpc_b64 s[30:31] 4978 %vec0 = call <4 x half> asm "; def $0", "=v"() 4979 %vec1 = call <4 x half> asm "; def $0", "=v"() 4980 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 4> 4981 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 4982 ret void 4983} 4984 4985define void @v_shuffle_v3f16_v4f16__7_0_4(ptr addrspace(1) inreg %ptr) { 4986; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_4: 4987; GFX900: ; %bb.0: 4988; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4989; GFX900-NEXT: ;;#ASMSTART 4990; GFX900-NEXT: ; def v[0:1] 4991; GFX900-NEXT: ;;#ASMEND 4992; GFX900-NEXT: v_mov_b32_e32 v3, 0 4993; GFX900-NEXT: ;;#ASMSTART 4994; GFX900-NEXT: ; def v[1:2] 4995; GFX900-NEXT: ;;#ASMEND 4996; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 4997; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 4998; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 4999; GFX900-NEXT: s_waitcnt vmcnt(0) 5000; GFX900-NEXT: s_setpc_b64 s[30:31] 5001; 5002; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_4: 5003; GFX90A: ; %bb.0: 5004; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5005; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5006; GFX90A-NEXT: ;;#ASMSTART 5007; GFX90A-NEXT: ; def v[0:1] 5008; GFX90A-NEXT: ;;#ASMEND 5009; GFX90A-NEXT: ;;#ASMSTART 5010; GFX90A-NEXT: ; def v[2:3] 5011; GFX90A-NEXT: ;;#ASMEND 5012; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 5013; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 5014; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5015; GFX90A-NEXT: s_waitcnt vmcnt(0) 5016; GFX90A-NEXT: s_setpc_b64 s[30:31] 5017; 5018; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_4: 5019; GFX940: ; %bb.0: 5020; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5021; GFX940-NEXT: v_mov_b32_e32 v4, 0 5022; GFX940-NEXT: ;;#ASMSTART 5023; GFX940-NEXT: ; def v[0:1] 5024; GFX940-NEXT: ;;#ASMEND 5025; GFX940-NEXT: ;;#ASMSTART 5026; GFX940-NEXT: ; def v[2:3] 5027; GFX940-NEXT: ;;#ASMEND 5028; GFX940-NEXT: s_nop 0 5029; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 5030; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 5031; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5032; GFX940-NEXT: s_waitcnt vmcnt(0) 5033; GFX940-NEXT: s_setpc_b64 s[30:31] 5034 %vec0 = call <4 x half> asm "; def $0", "=v"() 5035 %vec1 = call <4 x half> asm "; def $0", "=v"() 5036 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 4> 5037 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5038 ret void 5039} 5040 5041define void @v_shuffle_v3f16_v4f16__7_1_4(ptr addrspace(1) inreg %ptr) { 5042; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_4: 5043; GFX900: ; %bb.0: 5044; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5045; GFX900-NEXT: ;;#ASMSTART 5046; GFX900-NEXT: ; def v[0:1] 5047; GFX900-NEXT: ;;#ASMEND 5048; GFX900-NEXT: v_mov_b32_e32 v3, 0 5049; GFX900-NEXT: ;;#ASMSTART 5050; GFX900-NEXT: ; def v[1:2] 5051; GFX900-NEXT: ;;#ASMEND 5052; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5053; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 5054; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 5055; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5056; GFX900-NEXT: s_waitcnt vmcnt(0) 5057; GFX900-NEXT: s_setpc_b64 s[30:31] 5058; 5059; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_4: 5060; GFX90A: ; %bb.0: 5061; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5062; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5063; GFX90A-NEXT: ;;#ASMSTART 5064; GFX90A-NEXT: ; def v[0:1] 5065; GFX90A-NEXT: ;;#ASMEND 5066; GFX90A-NEXT: ;;#ASMSTART 5067; GFX90A-NEXT: ; def v[2:3] 5068; GFX90A-NEXT: ;;#ASMEND 5069; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5070; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 5071; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 5072; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5073; GFX90A-NEXT: s_waitcnt vmcnt(0) 5074; GFX90A-NEXT: s_setpc_b64 s[30:31] 5075; 5076; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_4: 5077; GFX940: ; %bb.0: 5078; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5079; GFX940-NEXT: v_mov_b32_e32 v4, 0 5080; GFX940-NEXT: ;;#ASMSTART 5081; GFX940-NEXT: ; def v[0:1] 5082; GFX940-NEXT: ;;#ASMEND 5083; GFX940-NEXT: ;;#ASMSTART 5084; GFX940-NEXT: ; def v[2:3] 5085; GFX940-NEXT: ;;#ASMEND 5086; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5087; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 5088; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 5089; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5090; GFX940-NEXT: s_waitcnt vmcnt(0) 5091; GFX940-NEXT: s_setpc_b64 s[30:31] 5092 %vec0 = call <4 x half> asm "; def $0", "=v"() 5093 %vec1 = call <4 x half> asm "; def $0", "=v"() 5094 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 4> 5095 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5096 ret void 5097} 5098 5099define void @v_shuffle_v3f16_v4f16__7_2_4(ptr addrspace(1) inreg %ptr) { 5100; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_4: 5101; GFX900: ; %bb.0: 5102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5103; GFX900-NEXT: v_mov_b32_e32 v4, 0 5104; GFX900-NEXT: ;;#ASMSTART 5105; GFX900-NEXT: ; def v[0:1] 5106; GFX900-NEXT: ;;#ASMEND 5107; GFX900-NEXT: ;;#ASMSTART 5108; GFX900-NEXT: ; def v[2:3] 5109; GFX900-NEXT: ;;#ASMEND 5110; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 5111; GFX900-NEXT: global_store_short v4, v2, s[16:17] offset:4 5112; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5113; GFX900-NEXT: s_waitcnt vmcnt(0) 5114; GFX900-NEXT: s_setpc_b64 s[30:31] 5115; 5116; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_4: 5117; GFX90A: ; %bb.0: 5118; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5119; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5120; GFX90A-NEXT: ;;#ASMSTART 5121; GFX90A-NEXT: ; def v[0:1] 5122; GFX90A-NEXT: ;;#ASMEND 5123; GFX90A-NEXT: ;;#ASMSTART 5124; GFX90A-NEXT: ; def v[2:3] 5125; GFX90A-NEXT: ;;#ASMEND 5126; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 5127; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 5128; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5129; GFX90A-NEXT: s_waitcnt vmcnt(0) 5130; GFX90A-NEXT: s_setpc_b64 s[30:31] 5131; 5132; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_4: 5133; GFX940: ; %bb.0: 5134; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5135; GFX940-NEXT: v_mov_b32_e32 v4, 0 5136; GFX940-NEXT: ;;#ASMSTART 5137; GFX940-NEXT: ; def v[0:1] 5138; GFX940-NEXT: ;;#ASMEND 5139; GFX940-NEXT: ;;#ASMSTART 5140; GFX940-NEXT: ; def v[2:3] 5141; GFX940-NEXT: ;;#ASMEND 5142; GFX940-NEXT: s_nop 0 5143; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 5144; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 5145; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5146; GFX940-NEXT: s_waitcnt vmcnt(0) 5147; GFX940-NEXT: s_setpc_b64 s[30:31] 5148 %vec0 = call <4 x half> asm "; def $0", "=v"() 5149 %vec1 = call <4 x half> asm "; def $0", "=v"() 5150 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 4> 5151 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5152 ret void 5153} 5154 5155define void @v_shuffle_v3f16_v4f16__7_3_4(ptr addrspace(1) inreg %ptr) { 5156; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_4: 5157; GFX900: ; %bb.0: 5158; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5159; GFX900-NEXT: v_mov_b32_e32 v4, 0 5160; GFX900-NEXT: ;;#ASMSTART 5161; GFX900-NEXT: ; def v[0:1] 5162; GFX900-NEXT: ;;#ASMEND 5163; GFX900-NEXT: ;;#ASMSTART 5164; GFX900-NEXT: ; def v[2:3] 5165; GFX900-NEXT: ;;#ASMEND 5166; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5167; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 5168; GFX900-NEXT: global_store_short v4, v2, s[16:17] offset:4 5169; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5170; GFX900-NEXT: s_waitcnt vmcnt(0) 5171; GFX900-NEXT: s_setpc_b64 s[30:31] 5172; 5173; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_4: 5174; GFX90A: ; %bb.0: 5175; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5176; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5177; GFX90A-NEXT: ;;#ASMSTART 5178; GFX90A-NEXT: ; def v[0:1] 5179; GFX90A-NEXT: ;;#ASMEND 5180; GFX90A-NEXT: ;;#ASMSTART 5181; GFX90A-NEXT: ; def v[2:3] 5182; GFX90A-NEXT: ;;#ASMEND 5183; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5184; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 5185; GFX90A-NEXT: global_store_short v4, v2, s[16:17] offset:4 5186; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5187; GFX90A-NEXT: s_waitcnt vmcnt(0) 5188; GFX90A-NEXT: s_setpc_b64 s[30:31] 5189; 5190; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_4: 5191; GFX940: ; %bb.0: 5192; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5193; GFX940-NEXT: v_mov_b32_e32 v4, 0 5194; GFX940-NEXT: ;;#ASMSTART 5195; GFX940-NEXT: ; def v[0:1] 5196; GFX940-NEXT: ;;#ASMEND 5197; GFX940-NEXT: ;;#ASMSTART 5198; GFX940-NEXT: ; def v[2:3] 5199; GFX940-NEXT: ;;#ASMEND 5200; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5201; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 5202; GFX940-NEXT: global_store_short v4, v2, s[0:1] offset:4 sc0 sc1 5203; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5204; GFX940-NEXT: s_waitcnt vmcnt(0) 5205; GFX940-NEXT: s_setpc_b64 s[30:31] 5206 %vec0 = call <4 x half> asm "; def $0", "=v"() 5207 %vec1 = call <4 x half> asm "; def $0", "=v"() 5208 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 4> 5209 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5210 ret void 5211} 5212 5213define void @v_shuffle_v3f16_v4f16__7_5_4(ptr addrspace(1) inreg %ptr) { 5214; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_4: 5215; GFX900: ; %bb.0: 5216; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5217; GFX900-NEXT: v_mov_b32_e32 v2, 0 5218; GFX900-NEXT: ;;#ASMSTART 5219; GFX900-NEXT: ; def v[0:1] 5220; GFX900-NEXT: ;;#ASMEND 5221; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5222; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 5223; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5224; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5225; GFX900-NEXT: s_waitcnt vmcnt(0) 5226; GFX900-NEXT: s_setpc_b64 s[30:31] 5227; 5228; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_4: 5229; GFX90A: ; %bb.0: 5230; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5231; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5232; GFX90A-NEXT: ;;#ASMSTART 5233; GFX90A-NEXT: ; def v[0:1] 5234; GFX90A-NEXT: ;;#ASMEND 5235; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5236; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 5237; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5238; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5239; GFX90A-NEXT: s_waitcnt vmcnt(0) 5240; GFX90A-NEXT: s_setpc_b64 s[30:31] 5241; 5242; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_4: 5243; GFX940: ; %bb.0: 5244; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5245; GFX940-NEXT: v_mov_b32_e32 v2, 0 5246; GFX940-NEXT: ;;#ASMSTART 5247; GFX940-NEXT: ; def v[0:1] 5248; GFX940-NEXT: ;;#ASMEND 5249; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5250; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 5251; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5252; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5253; GFX940-NEXT: s_waitcnt vmcnt(0) 5254; GFX940-NEXT: s_setpc_b64 s[30:31] 5255 %vec0 = call <4 x half> asm "; def $0", "=v"() 5256 %vec1 = call <4 x half> asm "; def $0", "=v"() 5257 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 4> 5258 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5259 ret void 5260} 5261 5262define void @v_shuffle_v3f16_v4f16__7_6_4(ptr addrspace(1) inreg %ptr) { 5263; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_4: 5264; GFX900: ; %bb.0: 5265; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5266; GFX900-NEXT: v_mov_b32_e32 v2, 0 5267; GFX900-NEXT: ;;#ASMSTART 5268; GFX900-NEXT: ; def v[0:1] 5269; GFX900-NEXT: ;;#ASMEND 5270; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 5271; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5272; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5273; GFX900-NEXT: s_waitcnt vmcnt(0) 5274; GFX900-NEXT: s_setpc_b64 s[30:31] 5275; 5276; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_4: 5277; GFX90A: ; %bb.0: 5278; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5279; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5280; GFX90A-NEXT: ;;#ASMSTART 5281; GFX90A-NEXT: ; def v[0:1] 5282; GFX90A-NEXT: ;;#ASMEND 5283; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16 5284; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5285; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5286; GFX90A-NEXT: s_waitcnt vmcnt(0) 5287; GFX90A-NEXT: s_setpc_b64 s[30:31] 5288; 5289; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_4: 5290; GFX940: ; %bb.0: 5291; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5292; GFX940-NEXT: v_mov_b32_e32 v2, 0 5293; GFX940-NEXT: ;;#ASMSTART 5294; GFX940-NEXT: ; def v[0:1] 5295; GFX940-NEXT: ;;#ASMEND 5296; GFX940-NEXT: s_nop 0 5297; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 5298; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5299; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5300; GFX940-NEXT: s_waitcnt vmcnt(0) 5301; GFX940-NEXT: s_setpc_b64 s[30:31] 5302 %vec0 = call <4 x half> asm "; def $0", "=v"() 5303 %vec1 = call <4 x half> asm "; def $0", "=v"() 5304 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 4> 5305 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5306 ret void 5307} 5308 5309define void @v_shuffle_v3f16_v4f16__u_5_5(ptr addrspace(1) inreg %ptr) { 5310; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_5_5: 5311; GFX900: ; %bb.0: 5312; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5313; GFX900-NEXT: v_mov_b32_e32 v2, 0 5314; GFX900-NEXT: ;;#ASMSTART 5315; GFX900-NEXT: ; def v[0:1] 5316; GFX900-NEXT: ;;#ASMEND 5317; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 5318; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5319; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5320; GFX900-NEXT: s_waitcnt vmcnt(0) 5321; GFX900-NEXT: s_setpc_b64 s[30:31] 5322; 5323; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_5_5: 5324; GFX90A: ; %bb.0: 5325; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5326; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5327; GFX90A-NEXT: ;;#ASMSTART 5328; GFX90A-NEXT: ; def v[0:1] 5329; GFX90A-NEXT: ;;#ASMEND 5330; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 5331; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5332; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5333; GFX90A-NEXT: s_waitcnt vmcnt(0) 5334; GFX90A-NEXT: s_setpc_b64 s[30:31] 5335; 5336; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_5_5: 5337; GFX940: ; %bb.0: 5338; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5339; GFX940-NEXT: v_mov_b32_e32 v2, 0 5340; GFX940-NEXT: ;;#ASMSTART 5341; GFX940-NEXT: ; def v[0:1] 5342; GFX940-NEXT: ;;#ASMEND 5343; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 5344; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5345; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5346; GFX940-NEXT: s_waitcnt vmcnt(0) 5347; GFX940-NEXT: s_setpc_b64 s[30:31] 5348 %vec0 = call <4 x half> asm "; def $0", "=v"() 5349 %vec1 = call <4 x half> asm "; def $0", "=v"() 5350 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 5, i32 5> 5351 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5352 ret void 5353} 5354 5355define void @v_shuffle_v3f16_v4f16__0_5_5(ptr addrspace(1) inreg %ptr) { 5356; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_5_5: 5357; GFX900: ; %bb.0: 5358; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5359; GFX900-NEXT: ;;#ASMSTART 5360; GFX900-NEXT: ; def v[0:1] 5361; GFX900-NEXT: ;;#ASMEND 5362; GFX900-NEXT: s_mov_b32 s4, 0xffff 5363; GFX900-NEXT: v_mov_b32_e32 v3, 0 5364; GFX900-NEXT: ;;#ASMSTART 5365; GFX900-NEXT: ; def v[1:2] 5366; GFX900-NEXT: ;;#ASMEND 5367; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 5368; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5369; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 5370; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 5371; GFX900-NEXT: s_waitcnt vmcnt(0) 5372; GFX900-NEXT: s_setpc_b64 s[30:31] 5373; 5374; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_5_5: 5375; GFX90A: ; %bb.0: 5376; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5377; GFX90A-NEXT: ;;#ASMSTART 5378; GFX90A-NEXT: ; def v[0:1] 5379; GFX90A-NEXT: ;;#ASMEND 5380; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5381; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5382; GFX90A-NEXT: ;;#ASMSTART 5383; GFX90A-NEXT: ; def v[2:3] 5384; GFX90A-NEXT: ;;#ASMEND 5385; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 5386; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5387; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2 5388; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 5389; GFX90A-NEXT: s_waitcnt vmcnt(0) 5390; GFX90A-NEXT: s_setpc_b64 s[30:31] 5391; 5392; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_5_5: 5393; GFX940: ; %bb.0: 5394; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5395; GFX940-NEXT: ;;#ASMSTART 5396; GFX940-NEXT: ; def v[0:1] 5397; GFX940-NEXT: ;;#ASMEND 5398; GFX940-NEXT: s_mov_b32 s2, 0xffff 5399; GFX940-NEXT: v_mov_b32_e32 v4, 0 5400; GFX940-NEXT: ;;#ASMSTART 5401; GFX940-NEXT: ; def v[2:3] 5402; GFX940-NEXT: ;;#ASMEND 5403; GFX940-NEXT: s_nop 0 5404; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 5405; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5406; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 5407; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 5408; GFX940-NEXT: s_waitcnt vmcnt(0) 5409; GFX940-NEXT: s_setpc_b64 s[30:31] 5410 %vec0 = call <4 x half> asm "; def $0", "=v"() 5411 %vec1 = call <4 x half> asm "; def $0", "=v"() 5412 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 5, i32 5> 5413 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5414 ret void 5415} 5416 5417define void @v_shuffle_v3f16_v4f16__1_5_5(ptr addrspace(1) inreg %ptr) { 5418; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_5_5: 5419; GFX900: ; %bb.0: 5420; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5421; GFX900-NEXT: ;;#ASMSTART 5422; GFX900-NEXT: ; def v[0:1] 5423; GFX900-NEXT: ;;#ASMEND 5424; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5425; GFX900-NEXT: v_mov_b32_e32 v3, 0 5426; GFX900-NEXT: ;;#ASMSTART 5427; GFX900-NEXT: ; def v[1:2] 5428; GFX900-NEXT: ;;#ASMEND 5429; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 5430; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5431; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 5432; GFX900-NEXT: global_store_short v3, v0, s[16:17] offset:4 5433; GFX900-NEXT: s_waitcnt vmcnt(0) 5434; GFX900-NEXT: s_setpc_b64 s[30:31] 5435; 5436; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_5_5: 5437; GFX90A: ; %bb.0: 5438; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5439; GFX90A-NEXT: ;;#ASMSTART 5440; GFX90A-NEXT: ; def v[0:1] 5441; GFX90A-NEXT: ;;#ASMEND 5442; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5443; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5444; GFX90A-NEXT: ;;#ASMSTART 5445; GFX90A-NEXT: ; def v[2:3] 5446; GFX90A-NEXT: ;;#ASMEND 5447; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 5448; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5449; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v2 5450; GFX90A-NEXT: global_store_short v4, v0, s[16:17] offset:4 5451; GFX90A-NEXT: s_waitcnt vmcnt(0) 5452; GFX90A-NEXT: s_setpc_b64 s[30:31] 5453; 5454; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_5_5: 5455; GFX940: ; %bb.0: 5456; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5457; GFX940-NEXT: ;;#ASMSTART 5458; GFX940-NEXT: ; def v[0:1] 5459; GFX940-NEXT: ;;#ASMEND 5460; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5461; GFX940-NEXT: v_mov_b32_e32 v4, 0 5462; GFX940-NEXT: ;;#ASMSTART 5463; GFX940-NEXT: ; def v[2:3] 5464; GFX940-NEXT: ;;#ASMEND 5465; GFX940-NEXT: s_nop 0 5466; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 5467; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5468; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v2 5469; GFX940-NEXT: global_store_short v4, v0, s[0:1] offset:4 sc0 sc1 5470; GFX940-NEXT: s_waitcnt vmcnt(0) 5471; GFX940-NEXT: s_setpc_b64 s[30:31] 5472 %vec0 = call <4 x half> asm "; def $0", "=v"() 5473 %vec1 = call <4 x half> asm "; def $0", "=v"() 5474 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 5, i32 5> 5475 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5476 ret void 5477} 5478 5479define void @v_shuffle_v3f16_v4f16__2_5_5(ptr addrspace(1) inreg %ptr) { 5480; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_5_5: 5481; GFX900: ; %bb.0: 5482; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5483; GFX900-NEXT: ;;#ASMSTART 5484; GFX900-NEXT: ; def v[0:1] 5485; GFX900-NEXT: ;;#ASMEND 5486; GFX900-NEXT: s_mov_b32 s4, 0xffff 5487; GFX900-NEXT: v_mov_b32_e32 v4, 0 5488; GFX900-NEXT: ;;#ASMSTART 5489; GFX900-NEXT: ; def v[2:3] 5490; GFX900-NEXT: ;;#ASMEND 5491; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 5492; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5493; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 5494; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5495; GFX900-NEXT: s_waitcnt vmcnt(0) 5496; GFX900-NEXT: s_setpc_b64 s[30:31] 5497; 5498; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_5_5: 5499; GFX90A: ; %bb.0: 5500; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5501; GFX90A-NEXT: ;;#ASMSTART 5502; GFX90A-NEXT: ; def v[0:1] 5503; GFX90A-NEXT: ;;#ASMEND 5504; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5505; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5506; GFX90A-NEXT: ;;#ASMSTART 5507; GFX90A-NEXT: ; def v[2:3] 5508; GFX90A-NEXT: ;;#ASMEND 5509; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 5510; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5511; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 5512; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5513; GFX90A-NEXT: s_waitcnt vmcnt(0) 5514; GFX90A-NEXT: s_setpc_b64 s[30:31] 5515; 5516; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_5_5: 5517; GFX940: ; %bb.0: 5518; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5519; GFX940-NEXT: ;;#ASMSTART 5520; GFX940-NEXT: ; def v[0:1] 5521; GFX940-NEXT: ;;#ASMEND 5522; GFX940-NEXT: s_mov_b32 s2, 0xffff 5523; GFX940-NEXT: v_mov_b32_e32 v4, 0 5524; GFX940-NEXT: ;;#ASMSTART 5525; GFX940-NEXT: ; def v[2:3] 5526; GFX940-NEXT: ;;#ASMEND 5527; GFX940-NEXT: s_nop 0 5528; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 5529; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5530; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 5531; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5532; GFX940-NEXT: s_waitcnt vmcnt(0) 5533; GFX940-NEXT: s_setpc_b64 s[30:31] 5534 %vec0 = call <4 x half> asm "; def $0", "=v"() 5535 %vec1 = call <4 x half> asm "; def $0", "=v"() 5536 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 5, i32 5> 5537 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5538 ret void 5539} 5540 5541define void @v_shuffle_v3f16_v4f16__3_5_5(ptr addrspace(1) inreg %ptr) { 5542; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_5_5: 5543; GFX900: ; %bb.0: 5544; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5545; GFX900-NEXT: ;;#ASMSTART 5546; GFX900-NEXT: ; def v[0:1] 5547; GFX900-NEXT: ;;#ASMEND 5548; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5549; GFX900-NEXT: v_mov_b32_e32 v4, 0 5550; GFX900-NEXT: ;;#ASMSTART 5551; GFX900-NEXT: ; def v[2:3] 5552; GFX900-NEXT: ;;#ASMEND 5553; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4 5554; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5555; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5556; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 5557; GFX900-NEXT: s_waitcnt vmcnt(0) 5558; GFX900-NEXT: s_setpc_b64 s[30:31] 5559; 5560; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_5_5: 5561; GFX90A: ; %bb.0: 5562; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5563; GFX90A-NEXT: ;;#ASMSTART 5564; GFX90A-NEXT: ; def v[0:1] 5565; GFX90A-NEXT: ;;#ASMEND 5566; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5567; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5568; GFX90A-NEXT: ;;#ASMSTART 5569; GFX90A-NEXT: ; def v[2:3] 5570; GFX90A-NEXT: ;;#ASMEND 5571; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4 5572; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5573; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5574; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 5575; GFX90A-NEXT: s_waitcnt vmcnt(0) 5576; GFX90A-NEXT: s_setpc_b64 s[30:31] 5577; 5578; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_5_5: 5579; GFX940: ; %bb.0: 5580; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5581; GFX940-NEXT: ;;#ASMSTART 5582; GFX940-NEXT: ; def v[0:1] 5583; GFX940-NEXT: ;;#ASMEND 5584; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5585; GFX940-NEXT: v_mov_b32_e32 v4, 0 5586; GFX940-NEXT: ;;#ASMSTART 5587; GFX940-NEXT: ; def v[2:3] 5588; GFX940-NEXT: ;;#ASMEND 5589; GFX940-NEXT: s_nop 0 5590; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 5591; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v2 5592; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5593; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 5594; GFX940-NEXT: s_waitcnt vmcnt(0) 5595; GFX940-NEXT: s_setpc_b64 s[30:31] 5596 %vec0 = call <4 x half> asm "; def $0", "=v"() 5597 %vec1 = call <4 x half> asm "; def $0", "=v"() 5598 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 5, i32 5> 5599 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5600 ret void 5601} 5602 5603define void @v_shuffle_v3f16_v4f16__4_5_5(ptr addrspace(1) inreg %ptr) { 5604; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_5_5: 5605; GFX900: ; %bb.0: 5606; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5607; GFX900-NEXT: v_mov_b32_e32 v2, 0 5608; GFX900-NEXT: ;;#ASMSTART 5609; GFX900-NEXT: ; def v[0:1] 5610; GFX900-NEXT: ;;#ASMEND 5611; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 5612; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5613; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5614; GFX900-NEXT: s_waitcnt vmcnt(0) 5615; GFX900-NEXT: s_setpc_b64 s[30:31] 5616; 5617; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_5_5: 5618; GFX90A: ; %bb.0: 5619; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5620; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5621; GFX90A-NEXT: ;;#ASMSTART 5622; GFX90A-NEXT: ; def v[0:1] 5623; GFX90A-NEXT: ;;#ASMEND 5624; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 5625; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5626; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5627; GFX90A-NEXT: s_waitcnt vmcnt(0) 5628; GFX90A-NEXT: s_setpc_b64 s[30:31] 5629; 5630; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_5_5: 5631; GFX940: ; %bb.0: 5632; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5633; GFX940-NEXT: v_mov_b32_e32 v2, 0 5634; GFX940-NEXT: ;;#ASMSTART 5635; GFX940-NEXT: ; def v[0:1] 5636; GFX940-NEXT: ;;#ASMEND 5637; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 5638; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5639; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5640; GFX940-NEXT: s_waitcnt vmcnt(0) 5641; GFX940-NEXT: s_setpc_b64 s[30:31] 5642 %vec0 = call <4 x half> asm "; def $0", "=v"() 5643 %vec1 = call <4 x half> asm "; def $0", "=v"() 5644 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 5, i32 5> 5645 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5646 ret void 5647} 5648 5649define void @v_shuffle_v3f16_v4f16__5_5_5(ptr addrspace(1) inreg %ptr) { 5650; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_5_5: 5651; GFX900: ; %bb.0: 5652; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5653; GFX900-NEXT: ;;#ASMSTART 5654; GFX900-NEXT: ; def v[0:1] 5655; GFX900-NEXT: ;;#ASMEND 5656; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5657; GFX900-NEXT: v_mov_b32_e32 v2, 0 5658; GFX900-NEXT: v_perm_b32 v1, v0, v0, s4 5659; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5660; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5661; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5662; GFX900-NEXT: s_waitcnt vmcnt(0) 5663; GFX900-NEXT: s_setpc_b64 s[30:31] 5664; 5665; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_5_5: 5666; GFX90A: ; %bb.0: 5667; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5668; GFX90A-NEXT: ;;#ASMSTART 5669; GFX90A-NEXT: ; def v[0:1] 5670; GFX90A-NEXT: ;;#ASMEND 5671; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5672; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5673; GFX90A-NEXT: v_perm_b32 v1, v0, v0, s4 5674; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5675; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5676; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5677; GFX90A-NEXT: s_waitcnt vmcnt(0) 5678; GFX90A-NEXT: s_setpc_b64 s[30:31] 5679; 5680; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_5_5: 5681; GFX940: ; %bb.0: 5682; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5683; GFX940-NEXT: ;;#ASMSTART 5684; GFX940-NEXT: ; def v[0:1] 5685; GFX940-NEXT: ;;#ASMEND 5686; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5687; GFX940-NEXT: v_mov_b32_e32 v2, 0 5688; GFX940-NEXT: v_perm_b32 v1, v0, v0, s2 5689; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5690; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5691; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5692; GFX940-NEXT: s_waitcnt vmcnt(0) 5693; GFX940-NEXT: s_setpc_b64 s[30:31] 5694 %vec0 = call <4 x half> asm "; def $0", "=v"() 5695 %vec1 = call <4 x half> asm "; def $0", "=v"() 5696 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 5, i32 5> 5697 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5698 ret void 5699} 5700 5701define void @v_shuffle_v3f16_v4f16__6_5_5(ptr addrspace(1) inreg %ptr) { 5702; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_5_5: 5703; GFX900: ; %bb.0: 5704; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5705; GFX900-NEXT: ;;#ASMSTART 5706; GFX900-NEXT: ; def v[0:1] 5707; GFX900-NEXT: ;;#ASMEND 5708; GFX900-NEXT: s_mov_b32 s4, 0xffff 5709; GFX900-NEXT: v_mov_b32_e32 v2, 0 5710; GFX900-NEXT: v_bfi_b32 v1, s4, v1, v0 5711; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5712; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5713; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5714; GFX900-NEXT: s_waitcnt vmcnt(0) 5715; GFX900-NEXT: s_setpc_b64 s[30:31] 5716; 5717; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_5_5: 5718; GFX90A: ; %bb.0: 5719; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5720; GFX90A-NEXT: ;;#ASMSTART 5721; GFX90A-NEXT: ; def v[0:1] 5722; GFX90A-NEXT: ;;#ASMEND 5723; GFX90A-NEXT: s_mov_b32 s4, 0xffff 5724; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5725; GFX90A-NEXT: v_bfi_b32 v1, s4, v1, v0 5726; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5727; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5728; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5729; GFX90A-NEXT: s_waitcnt vmcnt(0) 5730; GFX90A-NEXT: s_setpc_b64 s[30:31] 5731; 5732; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_5_5: 5733; GFX940: ; %bb.0: 5734; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5735; GFX940-NEXT: ;;#ASMSTART 5736; GFX940-NEXT: ; def v[0:1] 5737; GFX940-NEXT: ;;#ASMEND 5738; GFX940-NEXT: s_mov_b32 s2, 0xffff 5739; GFX940-NEXT: v_mov_b32_e32 v2, 0 5740; GFX940-NEXT: v_bfi_b32 v1, s2, v1, v0 5741; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5742; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5743; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5744; GFX940-NEXT: s_waitcnt vmcnt(0) 5745; GFX940-NEXT: s_setpc_b64 s[30:31] 5746 %vec0 = call <4 x half> asm "; def $0", "=v"() 5747 %vec1 = call <4 x half> asm "; def $0", "=v"() 5748 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 5, i32 5> 5749 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5750 ret void 5751} 5752 5753define void @v_shuffle_v3f16_v4f16__7_5_5(ptr addrspace(1) inreg %ptr) { 5754; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_5: 5755; GFX900: ; %bb.0: 5756; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5757; GFX900-NEXT: ;;#ASMSTART 5758; GFX900-NEXT: ; def v[0:1] 5759; GFX900-NEXT: ;;#ASMEND 5760; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5761; GFX900-NEXT: v_mov_b32_e32 v2, 0 5762; GFX900-NEXT: v_perm_b32 v1, v0, v1, s4 5763; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5764; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5765; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 5766; GFX900-NEXT: s_waitcnt vmcnt(0) 5767; GFX900-NEXT: s_setpc_b64 s[30:31] 5768; 5769; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_5: 5770; GFX90A: ; %bb.0: 5771; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5772; GFX90A-NEXT: ;;#ASMSTART 5773; GFX90A-NEXT: ; def v[0:1] 5774; GFX90A-NEXT: ;;#ASMEND 5775; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5776; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5777; GFX90A-NEXT: v_perm_b32 v1, v0, v1, s4 5778; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5779; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5780; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 5781; GFX90A-NEXT: s_waitcnt vmcnt(0) 5782; GFX90A-NEXT: s_setpc_b64 s[30:31] 5783; 5784; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_5: 5785; GFX940: ; %bb.0: 5786; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5787; GFX940-NEXT: ;;#ASMSTART 5788; GFX940-NEXT: ; def v[0:1] 5789; GFX940-NEXT: ;;#ASMEND 5790; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5791; GFX940-NEXT: v_mov_b32_e32 v2, 0 5792; GFX940-NEXT: v_perm_b32 v1, v0, v1, s2 5793; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v0 5794; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5795; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 5796; GFX940-NEXT: s_waitcnt vmcnt(0) 5797; GFX940-NEXT: s_setpc_b64 s[30:31] 5798 %vec0 = call <4 x half> asm "; def $0", "=v"() 5799 %vec1 = call <4 x half> asm "; def $0", "=v"() 5800 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 5> 5801 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5802 ret void 5803} 5804 5805define void @v_shuffle_v3f16_v4f16__7_u_5(ptr addrspace(1) inreg %ptr) { 5806; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_5: 5807; GFX900: ; %bb.0: 5808; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5809; GFX900-NEXT: v_mov_b32_e32 v2, 0 5810; GFX900-NEXT: ;;#ASMSTART 5811; GFX900-NEXT: ; def v[0:1] 5812; GFX900-NEXT: ;;#ASMEND 5813; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 5814; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 5815; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 5816; GFX900-NEXT: s_waitcnt vmcnt(0) 5817; GFX900-NEXT: s_setpc_b64 s[30:31] 5818; 5819; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_5: 5820; GFX90A: ; %bb.0: 5821; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5822; GFX90A-NEXT: v_mov_b32_e32 v2, 0 5823; GFX90A-NEXT: ;;#ASMSTART 5824; GFX90A-NEXT: ; def v[0:1] 5825; GFX90A-NEXT: ;;#ASMEND 5826; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16 5827; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 5828; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 5829; GFX90A-NEXT: s_waitcnt vmcnt(0) 5830; GFX90A-NEXT: s_setpc_b64 s[30:31] 5831; 5832; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_5: 5833; GFX940: ; %bb.0: 5834; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5835; GFX940-NEXT: v_mov_b32_e32 v2, 0 5836; GFX940-NEXT: ;;#ASMSTART 5837; GFX940-NEXT: ; def v[0:1] 5838; GFX940-NEXT: ;;#ASMEND 5839; GFX940-NEXT: s_nop 0 5840; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 5841; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 5842; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 5843; GFX940-NEXT: s_waitcnt vmcnt(0) 5844; GFX940-NEXT: s_setpc_b64 s[30:31] 5845 %vec0 = call <4 x half> asm "; def $0", "=v"() 5846 %vec1 = call <4 x half> asm "; def $0", "=v"() 5847 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 5> 5848 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5849 ret void 5850} 5851 5852define void @v_shuffle_v3f16_v4f16__7_0_5(ptr addrspace(1) inreg %ptr) { 5853; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_5: 5854; GFX900: ; %bb.0: 5855; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5856; GFX900-NEXT: ;;#ASMSTART 5857; GFX900-NEXT: ; def v[0:1] 5858; GFX900-NEXT: ;;#ASMEND 5859; GFX900-NEXT: v_mov_b32_e32 v3, 0 5860; GFX900-NEXT: ;;#ASMSTART 5861; GFX900-NEXT: ; def v[1:2] 5862; GFX900-NEXT: ;;#ASMEND 5863; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 5864; GFX900-NEXT: global_store_short_d16_hi v3, v1, s[16:17] offset:4 5865; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5866; GFX900-NEXT: s_waitcnt vmcnt(0) 5867; GFX900-NEXT: s_setpc_b64 s[30:31] 5868; 5869; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_5: 5870; GFX90A: ; %bb.0: 5871; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5872; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5873; GFX90A-NEXT: ;;#ASMSTART 5874; GFX90A-NEXT: ; def v[0:1] 5875; GFX90A-NEXT: ;;#ASMEND 5876; GFX90A-NEXT: ;;#ASMSTART 5877; GFX90A-NEXT: ; def v[2:3] 5878; GFX90A-NEXT: ;;#ASMEND 5879; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 5880; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 5881; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5882; GFX90A-NEXT: s_waitcnt vmcnt(0) 5883; GFX90A-NEXT: s_setpc_b64 s[30:31] 5884; 5885; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_5: 5886; GFX940: ; %bb.0: 5887; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5888; GFX940-NEXT: v_mov_b32_e32 v4, 0 5889; GFX940-NEXT: ;;#ASMSTART 5890; GFX940-NEXT: ; def v[0:1] 5891; GFX940-NEXT: ;;#ASMEND 5892; GFX940-NEXT: ;;#ASMSTART 5893; GFX940-NEXT: ; def v[2:3] 5894; GFX940-NEXT: ;;#ASMEND 5895; GFX940-NEXT: s_nop 0 5896; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 5897; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 5898; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5899; GFX940-NEXT: s_waitcnt vmcnt(0) 5900; GFX940-NEXT: s_setpc_b64 s[30:31] 5901 %vec0 = call <4 x half> asm "; def $0", "=v"() 5902 %vec1 = call <4 x half> asm "; def $0", "=v"() 5903 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 5> 5904 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5905 ret void 5906} 5907 5908define void @v_shuffle_v3f16_v4f16__7_1_5(ptr addrspace(1) inreg %ptr) { 5909; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_5: 5910; GFX900: ; %bb.0: 5911; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5912; GFX900-NEXT: ;;#ASMSTART 5913; GFX900-NEXT: ; def v[0:1] 5914; GFX900-NEXT: ;;#ASMEND 5915; GFX900-NEXT: v_mov_b32_e32 v3, 0 5916; GFX900-NEXT: ;;#ASMSTART 5917; GFX900-NEXT: ; def v[1:2] 5918; GFX900-NEXT: ;;#ASMEND 5919; GFX900-NEXT: s_mov_b32 s4, 0x7060302 5920; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 5921; GFX900-NEXT: global_store_short_d16_hi v3, v1, s[16:17] offset:4 5922; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 5923; GFX900-NEXT: s_waitcnt vmcnt(0) 5924; GFX900-NEXT: s_setpc_b64 s[30:31] 5925; 5926; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_5: 5927; GFX90A: ; %bb.0: 5928; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5929; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5930; GFX90A-NEXT: ;;#ASMSTART 5931; GFX90A-NEXT: ; def v[0:1] 5932; GFX90A-NEXT: ;;#ASMEND 5933; GFX90A-NEXT: ;;#ASMSTART 5934; GFX90A-NEXT: ; def v[2:3] 5935; GFX90A-NEXT: ;;#ASMEND 5936; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 5937; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 5938; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 5939; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5940; GFX90A-NEXT: s_waitcnt vmcnt(0) 5941; GFX90A-NEXT: s_setpc_b64 s[30:31] 5942; 5943; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_5: 5944; GFX940: ; %bb.0: 5945; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5946; GFX940-NEXT: v_mov_b32_e32 v4, 0 5947; GFX940-NEXT: ;;#ASMSTART 5948; GFX940-NEXT: ; def v[0:1] 5949; GFX940-NEXT: ;;#ASMEND 5950; GFX940-NEXT: ;;#ASMSTART 5951; GFX940-NEXT: ; def v[2:3] 5952; GFX940-NEXT: ;;#ASMEND 5953; GFX940-NEXT: s_mov_b32 s2, 0x7060302 5954; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 5955; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 5956; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 5957; GFX940-NEXT: s_waitcnt vmcnt(0) 5958; GFX940-NEXT: s_setpc_b64 s[30:31] 5959 %vec0 = call <4 x half> asm "; def $0", "=v"() 5960 %vec1 = call <4 x half> asm "; def $0", "=v"() 5961 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 5> 5962 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 5963 ret void 5964} 5965 5966define void @v_shuffle_v3f16_v4f16__7_2_5(ptr addrspace(1) inreg %ptr) { 5967; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_5: 5968; GFX900: ; %bb.0: 5969; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5970; GFX900-NEXT: v_mov_b32_e32 v4, 0 5971; GFX900-NEXT: ;;#ASMSTART 5972; GFX900-NEXT: ; def v[0:1] 5973; GFX900-NEXT: ;;#ASMEND 5974; GFX900-NEXT: ;;#ASMSTART 5975; GFX900-NEXT: ; def v[2:3] 5976; GFX900-NEXT: ;;#ASMEND 5977; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 5978; GFX900-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 5979; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 5980; GFX900-NEXT: s_waitcnt vmcnt(0) 5981; GFX900-NEXT: s_setpc_b64 s[30:31] 5982; 5983; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_5: 5984; GFX90A: ; %bb.0: 5985; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5986; GFX90A-NEXT: v_mov_b32_e32 v4, 0 5987; GFX90A-NEXT: ;;#ASMSTART 5988; GFX90A-NEXT: ; def v[0:1] 5989; GFX90A-NEXT: ;;#ASMEND 5990; GFX90A-NEXT: ;;#ASMSTART 5991; GFX90A-NEXT: ; def v[2:3] 5992; GFX90A-NEXT: ;;#ASMEND 5993; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 5994; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 5995; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 5996; GFX90A-NEXT: s_waitcnt vmcnt(0) 5997; GFX90A-NEXT: s_setpc_b64 s[30:31] 5998; 5999; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_5: 6000; GFX940: ; %bb.0: 6001; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6002; GFX940-NEXT: v_mov_b32_e32 v4, 0 6003; GFX940-NEXT: ;;#ASMSTART 6004; GFX940-NEXT: ; def v[0:1] 6005; GFX940-NEXT: ;;#ASMEND 6006; GFX940-NEXT: ;;#ASMSTART 6007; GFX940-NEXT: ; def v[2:3] 6008; GFX940-NEXT: ;;#ASMEND 6009; GFX940-NEXT: s_nop 0 6010; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 6011; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 6012; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6013; GFX940-NEXT: s_waitcnt vmcnt(0) 6014; GFX940-NEXT: s_setpc_b64 s[30:31] 6015 %vec0 = call <4 x half> asm "; def $0", "=v"() 6016 %vec1 = call <4 x half> asm "; def $0", "=v"() 6017 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 5> 6018 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6019 ret void 6020} 6021 6022define void @v_shuffle_v3f16_v4f16__7_3_5(ptr addrspace(1) inreg %ptr) { 6023; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_5: 6024; GFX900: ; %bb.0: 6025; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6026; GFX900-NEXT: v_mov_b32_e32 v4, 0 6027; GFX900-NEXT: ;;#ASMSTART 6028; GFX900-NEXT: ; def v[0:1] 6029; GFX900-NEXT: ;;#ASMEND 6030; GFX900-NEXT: ;;#ASMSTART 6031; GFX900-NEXT: ; def v[2:3] 6032; GFX900-NEXT: ;;#ASMEND 6033; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6034; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 6035; GFX900-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 6036; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6037; GFX900-NEXT: s_waitcnt vmcnt(0) 6038; GFX900-NEXT: s_setpc_b64 s[30:31] 6039; 6040; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_5: 6041; GFX90A: ; %bb.0: 6042; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6043; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6044; GFX90A-NEXT: ;;#ASMSTART 6045; GFX90A-NEXT: ; def v[0:1] 6046; GFX90A-NEXT: ;;#ASMEND 6047; GFX90A-NEXT: ;;#ASMSTART 6048; GFX90A-NEXT: ; def v[2:3] 6049; GFX90A-NEXT: ;;#ASMEND 6050; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6051; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 6052; GFX90A-NEXT: global_store_short_d16_hi v4, v2, s[16:17] offset:4 6053; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6054; GFX90A-NEXT: s_waitcnt vmcnt(0) 6055; GFX90A-NEXT: s_setpc_b64 s[30:31] 6056; 6057; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_5: 6058; GFX940: ; %bb.0: 6059; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6060; GFX940-NEXT: v_mov_b32_e32 v4, 0 6061; GFX940-NEXT: ;;#ASMSTART 6062; GFX940-NEXT: ; def v[0:1] 6063; GFX940-NEXT: ;;#ASMEND 6064; GFX940-NEXT: ;;#ASMSTART 6065; GFX940-NEXT: ; def v[2:3] 6066; GFX940-NEXT: ;;#ASMEND 6067; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6068; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 6069; GFX940-NEXT: global_store_short_d16_hi v4, v2, s[0:1] offset:4 sc0 sc1 6070; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6071; GFX940-NEXT: s_waitcnt vmcnt(0) 6072; GFX940-NEXT: s_setpc_b64 s[30:31] 6073 %vec0 = call <4 x half> asm "; def $0", "=v"() 6074 %vec1 = call <4 x half> asm "; def $0", "=v"() 6075 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 5> 6076 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6077 ret void 6078} 6079 6080define void @v_shuffle_v3f16_v4f16__7_4_5(ptr addrspace(1) inreg %ptr) { 6081; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_5: 6082; GFX900: ; %bb.0: 6083; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6084; GFX900-NEXT: v_mov_b32_e32 v2, 0 6085; GFX900-NEXT: ;;#ASMSTART 6086; GFX900-NEXT: ; def v[0:1] 6087; GFX900-NEXT: ;;#ASMEND 6088; GFX900-NEXT: v_alignbit_b32 v1, v0, v1, 16 6089; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 6090; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 6091; GFX900-NEXT: s_waitcnt vmcnt(0) 6092; GFX900-NEXT: s_setpc_b64 s[30:31] 6093; 6094; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_5: 6095; GFX90A: ; %bb.0: 6096; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6097; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6098; GFX90A-NEXT: ;;#ASMSTART 6099; GFX90A-NEXT: ; def v[0:1] 6100; GFX90A-NEXT: ;;#ASMEND 6101; GFX90A-NEXT: v_alignbit_b32 v1, v0, v1, 16 6102; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 6103; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 6104; GFX90A-NEXT: s_waitcnt vmcnt(0) 6105; GFX90A-NEXT: s_setpc_b64 s[30:31] 6106; 6107; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_5: 6108; GFX940: ; %bb.0: 6109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6110; GFX940-NEXT: v_mov_b32_e32 v2, 0 6111; GFX940-NEXT: ;;#ASMSTART 6112; GFX940-NEXT: ; def v[0:1] 6113; GFX940-NEXT: ;;#ASMEND 6114; GFX940-NEXT: s_nop 0 6115; GFX940-NEXT: v_alignbit_b32 v1, v0, v1, 16 6116; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 6117; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 6118; GFX940-NEXT: s_waitcnt vmcnt(0) 6119; GFX940-NEXT: s_setpc_b64 s[30:31] 6120 %vec0 = call <4 x half> asm "; def $0", "=v"() 6121 %vec1 = call <4 x half> asm "; def $0", "=v"() 6122 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 5> 6123 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6124 ret void 6125} 6126 6127define void @v_shuffle_v3f16_v4f16__7_6_5(ptr addrspace(1) inreg %ptr) { 6128; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_5: 6129; GFX900: ; %bb.0: 6130; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6131; GFX900-NEXT: v_mov_b32_e32 v2, 0 6132; GFX900-NEXT: ;;#ASMSTART 6133; GFX900-NEXT: ; def v[0:1] 6134; GFX900-NEXT: ;;#ASMEND 6135; GFX900-NEXT: v_alignbit_b32 v1, v1, v1, 16 6136; GFX900-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 6137; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 6138; GFX900-NEXT: s_waitcnt vmcnt(0) 6139; GFX900-NEXT: s_setpc_b64 s[30:31] 6140; 6141; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_5: 6142; GFX90A: ; %bb.0: 6143; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6144; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6145; GFX90A-NEXT: ;;#ASMSTART 6146; GFX90A-NEXT: ; def v[0:1] 6147; GFX90A-NEXT: ;;#ASMEND 6148; GFX90A-NEXT: v_alignbit_b32 v1, v1, v1, 16 6149; GFX90A-NEXT: global_store_short_d16_hi v2, v0, s[16:17] offset:4 6150; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 6151; GFX90A-NEXT: s_waitcnt vmcnt(0) 6152; GFX90A-NEXT: s_setpc_b64 s[30:31] 6153; 6154; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_5: 6155; GFX940: ; %bb.0: 6156; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6157; GFX940-NEXT: v_mov_b32_e32 v2, 0 6158; GFX940-NEXT: ;;#ASMSTART 6159; GFX940-NEXT: ; def v[0:1] 6160; GFX940-NEXT: ;;#ASMEND 6161; GFX940-NEXT: s_nop 0 6162; GFX940-NEXT: v_alignbit_b32 v1, v1, v1, 16 6163; GFX940-NEXT: global_store_short_d16_hi v2, v0, s[0:1] offset:4 sc0 sc1 6164; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 6165; GFX940-NEXT: s_waitcnt vmcnt(0) 6166; GFX940-NEXT: s_setpc_b64 s[30:31] 6167 %vec0 = call <4 x half> asm "; def $0", "=v"() 6168 %vec1 = call <4 x half> asm "; def $0", "=v"() 6169 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 5> 6170 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6171 ret void 6172} 6173 6174define void @v_shuffle_v3f16_v4f16__u_6_6(ptr addrspace(1) inreg %ptr) { 6175; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_6_6: 6176; GFX900: ; %bb.0: 6177; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6178; GFX900-NEXT: v_mov_b32_e32 v2, 0 6179; GFX900-NEXT: ;;#ASMSTART 6180; GFX900-NEXT: ; def v[0:1] 6181; GFX900-NEXT: ;;#ASMEND 6182; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 6183; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6184; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6185; GFX900-NEXT: s_waitcnt vmcnt(0) 6186; GFX900-NEXT: s_setpc_b64 s[30:31] 6187; 6188; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_6_6: 6189; GFX90A: ; %bb.0: 6190; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6191; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6192; GFX90A-NEXT: ;;#ASMSTART 6193; GFX90A-NEXT: ; def v[0:1] 6194; GFX90A-NEXT: ;;#ASMEND 6195; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 6196; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6197; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6198; GFX90A-NEXT: s_waitcnt vmcnt(0) 6199; GFX90A-NEXT: s_setpc_b64 s[30:31] 6200; 6201; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_6_6: 6202; GFX940: ; %bb.0: 6203; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6204; GFX940-NEXT: v_mov_b32_e32 v2, 0 6205; GFX940-NEXT: ;;#ASMSTART 6206; GFX940-NEXT: ; def v[0:1] 6207; GFX940-NEXT: ;;#ASMEND 6208; GFX940-NEXT: s_nop 0 6209; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 6210; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6211; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6212; GFX940-NEXT: s_waitcnt vmcnt(0) 6213; GFX940-NEXT: s_setpc_b64 s[30:31] 6214 %vec0 = call <4 x half> asm "; def $0", "=v"() 6215 %vec1 = call <4 x half> asm "; def $0", "=v"() 6216 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 6, i32 6> 6217 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6218 ret void 6219} 6220 6221define void @v_shuffle_v3f16_v4f16__0_6_6(ptr addrspace(1) inreg %ptr) { 6222; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_6_6: 6223; GFX900: ; %bb.0: 6224; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6225; GFX900-NEXT: ;;#ASMSTART 6226; GFX900-NEXT: ; def v[0:1] 6227; GFX900-NEXT: ;;#ASMEND 6228; GFX900-NEXT: v_mov_b32_e32 v3, 0 6229; GFX900-NEXT: ;;#ASMSTART 6230; GFX900-NEXT: ; def v[1:2] 6231; GFX900-NEXT: ;;#ASMEND 6232; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6233; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 6234; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 6235; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 6236; GFX900-NEXT: s_waitcnt vmcnt(0) 6237; GFX900-NEXT: s_setpc_b64 s[30:31] 6238; 6239; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_6_6: 6240; GFX90A: ; %bb.0: 6241; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6242; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6243; GFX90A-NEXT: ;;#ASMSTART 6244; GFX90A-NEXT: ; def v[0:1] 6245; GFX90A-NEXT: ;;#ASMEND 6246; GFX90A-NEXT: ;;#ASMSTART 6247; GFX90A-NEXT: ; def v[2:3] 6248; GFX90A-NEXT: ;;#ASMEND 6249; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6250; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 6251; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6252; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6253; GFX90A-NEXT: s_waitcnt vmcnt(0) 6254; GFX90A-NEXT: s_setpc_b64 s[30:31] 6255; 6256; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_6_6: 6257; GFX940: ; %bb.0: 6258; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6259; GFX940-NEXT: v_mov_b32_e32 v4, 0 6260; GFX940-NEXT: ;;#ASMSTART 6261; GFX940-NEXT: ; def v[0:1] 6262; GFX940-NEXT: ;;#ASMEND 6263; GFX940-NEXT: ;;#ASMSTART 6264; GFX940-NEXT: ; def v[2:3] 6265; GFX940-NEXT: ;;#ASMEND 6266; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6267; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 6268; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6269; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6270; GFX940-NEXT: s_waitcnt vmcnt(0) 6271; GFX940-NEXT: s_setpc_b64 s[30:31] 6272 %vec0 = call <4 x half> asm "; def $0", "=v"() 6273 %vec1 = call <4 x half> asm "; def $0", "=v"() 6274 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 6, i32 6> 6275 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6276 ret void 6277} 6278 6279define void @v_shuffle_v3f16_v4f16__1_6_6(ptr addrspace(1) inreg %ptr) { 6280; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_6_6: 6281; GFX900: ; %bb.0: 6282; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6283; GFX900-NEXT: ;;#ASMSTART 6284; GFX900-NEXT: ; def v[0:1] 6285; GFX900-NEXT: ;;#ASMEND 6286; GFX900-NEXT: v_mov_b32_e32 v3, 0 6287; GFX900-NEXT: ;;#ASMSTART 6288; GFX900-NEXT: ; def v[1:2] 6289; GFX900-NEXT: ;;#ASMEND 6290; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 6291; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 6292; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 6293; GFX900-NEXT: s_waitcnt vmcnt(0) 6294; GFX900-NEXT: s_setpc_b64 s[30:31] 6295; 6296; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_6_6: 6297; GFX90A: ; %bb.0: 6298; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6299; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6300; GFX90A-NEXT: ;;#ASMSTART 6301; GFX90A-NEXT: ; def v[0:1] 6302; GFX90A-NEXT: ;;#ASMEND 6303; GFX90A-NEXT: ;;#ASMSTART 6304; GFX90A-NEXT: ; def v[2:3] 6305; GFX90A-NEXT: ;;#ASMEND 6306; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 6307; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6308; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6309; GFX90A-NEXT: s_waitcnt vmcnt(0) 6310; GFX90A-NEXT: s_setpc_b64 s[30:31] 6311; 6312; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_6_6: 6313; GFX940: ; %bb.0: 6314; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6315; GFX940-NEXT: v_mov_b32_e32 v4, 0 6316; GFX940-NEXT: ;;#ASMSTART 6317; GFX940-NEXT: ; def v[0:1] 6318; GFX940-NEXT: ;;#ASMEND 6319; GFX940-NEXT: ;;#ASMSTART 6320; GFX940-NEXT: ; def v[2:3] 6321; GFX940-NEXT: ;;#ASMEND 6322; GFX940-NEXT: s_nop 0 6323; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 6324; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6325; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6326; GFX940-NEXT: s_waitcnt vmcnt(0) 6327; GFX940-NEXT: s_setpc_b64 s[30:31] 6328 %vec0 = call <4 x half> asm "; def $0", "=v"() 6329 %vec1 = call <4 x half> asm "; def $0", "=v"() 6330 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 6, i32 6> 6331 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6332 ret void 6333} 6334 6335define void @v_shuffle_v3f16_v4f16__2_6_6(ptr addrspace(1) inreg %ptr) { 6336; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_6_6: 6337; GFX900: ; %bb.0: 6338; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6339; GFX900-NEXT: v_mov_b32_e32 v4, 0 6340; GFX900-NEXT: ;;#ASMSTART 6341; GFX900-NEXT: ; def v[0:1] 6342; GFX900-NEXT: ;;#ASMEND 6343; GFX900-NEXT: ;;#ASMSTART 6344; GFX900-NEXT: ; def v[2:3] 6345; GFX900-NEXT: ;;#ASMEND 6346; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6347; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 6348; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 6349; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6350; GFX900-NEXT: s_waitcnt vmcnt(0) 6351; GFX900-NEXT: s_setpc_b64 s[30:31] 6352; 6353; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_6_6: 6354; GFX90A: ; %bb.0: 6355; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6356; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6357; GFX90A-NEXT: ;;#ASMSTART 6358; GFX90A-NEXT: ; def v[0:1] 6359; GFX90A-NEXT: ;;#ASMEND 6360; GFX90A-NEXT: ;;#ASMSTART 6361; GFX90A-NEXT: ; def v[2:3] 6362; GFX90A-NEXT: ;;#ASMEND 6363; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6364; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 6365; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6366; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6367; GFX90A-NEXT: s_waitcnt vmcnt(0) 6368; GFX90A-NEXT: s_setpc_b64 s[30:31] 6369; 6370; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_6_6: 6371; GFX940: ; %bb.0: 6372; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6373; GFX940-NEXT: v_mov_b32_e32 v4, 0 6374; GFX940-NEXT: ;;#ASMSTART 6375; GFX940-NEXT: ; def v[0:1] 6376; GFX940-NEXT: ;;#ASMEND 6377; GFX940-NEXT: ;;#ASMSTART 6378; GFX940-NEXT: ; def v[2:3] 6379; GFX940-NEXT: ;;#ASMEND 6380; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6381; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 6382; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6383; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6384; GFX940-NEXT: s_waitcnt vmcnt(0) 6385; GFX940-NEXT: s_setpc_b64 s[30:31] 6386 %vec0 = call <4 x half> asm "; def $0", "=v"() 6387 %vec1 = call <4 x half> asm "; def $0", "=v"() 6388 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 6, i32 6> 6389 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6390 ret void 6391} 6392 6393define void @v_shuffle_v3f16_v4f16__3_6_6(ptr addrspace(1) inreg %ptr) { 6394; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_6_6: 6395; GFX900: ; %bb.0: 6396; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6397; GFX900-NEXT: v_mov_b32_e32 v4, 0 6398; GFX900-NEXT: ;;#ASMSTART 6399; GFX900-NEXT: ; def v[0:1] 6400; GFX900-NEXT: ;;#ASMEND 6401; GFX900-NEXT: ;;#ASMSTART 6402; GFX900-NEXT: ; def v[2:3] 6403; GFX900-NEXT: ;;#ASMEND 6404; GFX900-NEXT: v_alignbit_b32 v0, v3, v1, 16 6405; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 6406; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6407; GFX900-NEXT: s_waitcnt vmcnt(0) 6408; GFX900-NEXT: s_setpc_b64 s[30:31] 6409; 6410; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_6_6: 6411; GFX90A: ; %bb.0: 6412; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6413; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6414; GFX90A-NEXT: ;;#ASMSTART 6415; GFX90A-NEXT: ; def v[0:1] 6416; GFX90A-NEXT: ;;#ASMEND 6417; GFX90A-NEXT: ;;#ASMSTART 6418; GFX90A-NEXT: ; def v[2:3] 6419; GFX90A-NEXT: ;;#ASMEND 6420; GFX90A-NEXT: v_alignbit_b32 v0, v3, v1, 16 6421; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6422; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6423; GFX90A-NEXT: s_waitcnt vmcnt(0) 6424; GFX90A-NEXT: s_setpc_b64 s[30:31] 6425; 6426; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_6_6: 6427; GFX940: ; %bb.0: 6428; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6429; GFX940-NEXT: v_mov_b32_e32 v4, 0 6430; GFX940-NEXT: ;;#ASMSTART 6431; GFX940-NEXT: ; def v[0:1] 6432; GFX940-NEXT: ;;#ASMEND 6433; GFX940-NEXT: ;;#ASMSTART 6434; GFX940-NEXT: ; def v[2:3] 6435; GFX940-NEXT: ;;#ASMEND 6436; GFX940-NEXT: s_nop 0 6437; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 6438; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6439; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6440; GFX940-NEXT: s_waitcnt vmcnt(0) 6441; GFX940-NEXT: s_setpc_b64 s[30:31] 6442 %vec0 = call <4 x half> asm "; def $0", "=v"() 6443 %vec1 = call <4 x half> asm "; def $0", "=v"() 6444 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 6, i32 6> 6445 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6446 ret void 6447} 6448 6449define void @v_shuffle_v3f16_v4f16__4_6_6(ptr addrspace(1) inreg %ptr) { 6450; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_6_6: 6451; GFX900: ; %bb.0: 6452; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6453; GFX900-NEXT: v_mov_b32_e32 v2, 0 6454; GFX900-NEXT: ;;#ASMSTART 6455; GFX900-NEXT: ; def v[0:1] 6456; GFX900-NEXT: ;;#ASMEND 6457; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6458; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 6459; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6460; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6461; GFX900-NEXT: s_waitcnt vmcnt(0) 6462; GFX900-NEXT: s_setpc_b64 s[30:31] 6463; 6464; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_6_6: 6465; GFX90A: ; %bb.0: 6466; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6467; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6468; GFX90A-NEXT: ;;#ASMSTART 6469; GFX90A-NEXT: ; def v[0:1] 6470; GFX90A-NEXT: ;;#ASMEND 6471; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6472; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 6473; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6474; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6475; GFX90A-NEXT: s_waitcnt vmcnt(0) 6476; GFX90A-NEXT: s_setpc_b64 s[30:31] 6477; 6478; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_6_6: 6479; GFX940: ; %bb.0: 6480; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6481; GFX940-NEXT: v_mov_b32_e32 v2, 0 6482; GFX940-NEXT: ;;#ASMSTART 6483; GFX940-NEXT: ; def v[0:1] 6484; GFX940-NEXT: ;;#ASMEND 6485; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6486; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 6487; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6488; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6489; GFX940-NEXT: s_waitcnt vmcnt(0) 6490; GFX940-NEXT: s_setpc_b64 s[30:31] 6491 %vec0 = call <4 x half> asm "; def $0", "=v"() 6492 %vec1 = call <4 x half> asm "; def $0", "=v"() 6493 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 6, i32 6> 6494 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6495 ret void 6496} 6497 6498define void @v_shuffle_v3f16_v4f16__5_6_6(ptr addrspace(1) inreg %ptr) { 6499; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_6_6: 6500; GFX900: ; %bb.0: 6501; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6502; GFX900-NEXT: v_mov_b32_e32 v2, 0 6503; GFX900-NEXT: ;;#ASMSTART 6504; GFX900-NEXT: ; def v[0:1] 6505; GFX900-NEXT: ;;#ASMEND 6506; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 6507; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6508; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6509; GFX900-NEXT: s_waitcnt vmcnt(0) 6510; GFX900-NEXT: s_setpc_b64 s[30:31] 6511; 6512; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_6_6: 6513; GFX90A: ; %bb.0: 6514; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6515; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6516; GFX90A-NEXT: ;;#ASMSTART 6517; GFX90A-NEXT: ; def v[0:1] 6518; GFX90A-NEXT: ;;#ASMEND 6519; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 6520; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6521; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6522; GFX90A-NEXT: s_waitcnt vmcnt(0) 6523; GFX90A-NEXT: s_setpc_b64 s[30:31] 6524; 6525; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_6_6: 6526; GFX940: ; %bb.0: 6527; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6528; GFX940-NEXT: v_mov_b32_e32 v2, 0 6529; GFX940-NEXT: ;;#ASMSTART 6530; GFX940-NEXT: ; def v[0:1] 6531; GFX940-NEXT: ;;#ASMEND 6532; GFX940-NEXT: s_nop 0 6533; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 6534; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6535; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6536; GFX940-NEXT: s_waitcnt vmcnt(0) 6537; GFX940-NEXT: s_setpc_b64 s[30:31] 6538 %vec0 = call <4 x half> asm "; def $0", "=v"() 6539 %vec1 = call <4 x half> asm "; def $0", "=v"() 6540 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 6, i32 6> 6541 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6542 ret void 6543} 6544 6545define void @v_shuffle_v3f16_v4f16__6_6_6(ptr addrspace(1) inreg %ptr) { 6546; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_6_6: 6547; GFX900: ; %bb.0: 6548; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6549; GFX900-NEXT: v_mov_b32_e32 v2, 0 6550; GFX900-NEXT: ;;#ASMSTART 6551; GFX900-NEXT: ; def v[0:1] 6552; GFX900-NEXT: ;;#ASMEND 6553; GFX900-NEXT: s_mov_b32 s4, 0x5040100 6554; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 6555; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6556; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6557; GFX900-NEXT: s_waitcnt vmcnt(0) 6558; GFX900-NEXT: s_setpc_b64 s[30:31] 6559; 6560; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_6_6: 6561; GFX90A: ; %bb.0: 6562; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6563; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6564; GFX90A-NEXT: ;;#ASMSTART 6565; GFX90A-NEXT: ; def v[0:1] 6566; GFX90A-NEXT: ;;#ASMEND 6567; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 6568; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 6569; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6570; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6571; GFX90A-NEXT: s_waitcnt vmcnt(0) 6572; GFX90A-NEXT: s_setpc_b64 s[30:31] 6573; 6574; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_6_6: 6575; GFX940: ; %bb.0: 6576; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6577; GFX940-NEXT: v_mov_b32_e32 v2, 0 6578; GFX940-NEXT: ;;#ASMSTART 6579; GFX940-NEXT: ; def v[0:1] 6580; GFX940-NEXT: ;;#ASMEND 6581; GFX940-NEXT: s_mov_b32 s2, 0x5040100 6582; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 6583; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6584; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6585; GFX940-NEXT: s_waitcnt vmcnt(0) 6586; GFX940-NEXT: s_setpc_b64 s[30:31] 6587 %vec0 = call <4 x half> asm "; def $0", "=v"() 6588 %vec1 = call <4 x half> asm "; def $0", "=v"() 6589 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 6, i32 6> 6590 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6591 ret void 6592} 6593 6594define void @v_shuffle_v3f16_v4f16__7_6_6(ptr addrspace(1) inreg %ptr) { 6595; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_6: 6596; GFX900: ; %bb.0: 6597; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6598; GFX900-NEXT: v_mov_b32_e32 v2, 0 6599; GFX900-NEXT: ;;#ASMSTART 6600; GFX900-NEXT: ; def v[0:1] 6601; GFX900-NEXT: ;;#ASMEND 6602; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 6603; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6604; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6605; GFX900-NEXT: s_waitcnt vmcnt(0) 6606; GFX900-NEXT: s_setpc_b64 s[30:31] 6607; 6608; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_6: 6609; GFX90A: ; %bb.0: 6610; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6611; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6612; GFX90A-NEXT: ;;#ASMSTART 6613; GFX90A-NEXT: ; def v[0:1] 6614; GFX90A-NEXT: ;;#ASMEND 6615; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 6616; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6617; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6618; GFX90A-NEXT: s_waitcnt vmcnt(0) 6619; GFX90A-NEXT: s_setpc_b64 s[30:31] 6620; 6621; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_6: 6622; GFX940: ; %bb.0: 6623; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6624; GFX940-NEXT: v_mov_b32_e32 v2, 0 6625; GFX940-NEXT: ;;#ASMSTART 6626; GFX940-NEXT: ; def v[0:1] 6627; GFX940-NEXT: ;;#ASMEND 6628; GFX940-NEXT: s_nop 0 6629; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 6630; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6631; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6632; GFX940-NEXT: s_waitcnt vmcnt(0) 6633; GFX940-NEXT: s_setpc_b64 s[30:31] 6634 %vec0 = call <4 x half> asm "; def $0", "=v"() 6635 %vec1 = call <4 x half> asm "; def $0", "=v"() 6636 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 6> 6637 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6638 ret void 6639} 6640 6641define void @v_shuffle_v3f16_v4f16__7_u_6(ptr addrspace(1) inreg %ptr) { 6642; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_6: 6643; GFX900: ; %bb.0: 6644; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6645; GFX900-NEXT: v_mov_b32_e32 v2, 0 6646; GFX900-NEXT: ;;#ASMSTART 6647; GFX900-NEXT: ; def v[0:1] 6648; GFX900-NEXT: ;;#ASMEND 6649; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 6650; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6651; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6652; GFX900-NEXT: s_waitcnt vmcnt(0) 6653; GFX900-NEXT: s_setpc_b64 s[30:31] 6654; 6655; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_6: 6656; GFX90A: ; %bb.0: 6657; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6658; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6659; GFX90A-NEXT: ;;#ASMSTART 6660; GFX90A-NEXT: ; def v[0:1] 6661; GFX90A-NEXT: ;;#ASMEND 6662; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 6663; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6664; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6665; GFX90A-NEXT: s_waitcnt vmcnt(0) 6666; GFX90A-NEXT: s_setpc_b64 s[30:31] 6667; 6668; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_6: 6669; GFX940: ; %bb.0: 6670; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6671; GFX940-NEXT: v_mov_b32_e32 v2, 0 6672; GFX940-NEXT: ;;#ASMSTART 6673; GFX940-NEXT: ; def v[0:1] 6674; GFX940-NEXT: ;;#ASMEND 6675; GFX940-NEXT: s_nop 0 6676; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 6677; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6678; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6679; GFX940-NEXT: s_waitcnt vmcnt(0) 6680; GFX940-NEXT: s_setpc_b64 s[30:31] 6681 %vec0 = call <4 x half> asm "; def $0", "=v"() 6682 %vec1 = call <4 x half> asm "; def $0", "=v"() 6683 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 6> 6684 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6685 ret void 6686} 6687 6688define void @v_shuffle_v3f16_v4f16__7_0_6(ptr addrspace(1) inreg %ptr) { 6689; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_6: 6690; GFX900: ; %bb.0: 6691; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6692; GFX900-NEXT: ;;#ASMSTART 6693; GFX900-NEXT: ; def v[0:1] 6694; GFX900-NEXT: ;;#ASMEND 6695; GFX900-NEXT: v_mov_b32_e32 v3, 0 6696; GFX900-NEXT: ;;#ASMSTART 6697; GFX900-NEXT: ; def v[1:2] 6698; GFX900-NEXT: ;;#ASMEND 6699; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 6700; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 6701; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 6702; GFX900-NEXT: s_waitcnt vmcnt(0) 6703; GFX900-NEXT: s_setpc_b64 s[30:31] 6704; 6705; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_6: 6706; GFX90A: ; %bb.0: 6707; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6708; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6709; GFX90A-NEXT: ;;#ASMSTART 6710; GFX90A-NEXT: ; def v[0:1] 6711; GFX90A-NEXT: ;;#ASMEND 6712; GFX90A-NEXT: ;;#ASMSTART 6713; GFX90A-NEXT: ; def v[2:3] 6714; GFX90A-NEXT: ;;#ASMEND 6715; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 6716; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6717; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6718; GFX90A-NEXT: s_waitcnt vmcnt(0) 6719; GFX90A-NEXT: s_setpc_b64 s[30:31] 6720; 6721; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_6: 6722; GFX940: ; %bb.0: 6723; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6724; GFX940-NEXT: v_mov_b32_e32 v4, 0 6725; GFX940-NEXT: ;;#ASMSTART 6726; GFX940-NEXT: ; def v[0:1] 6727; GFX940-NEXT: ;;#ASMEND 6728; GFX940-NEXT: ;;#ASMSTART 6729; GFX940-NEXT: ; def v[2:3] 6730; GFX940-NEXT: ;;#ASMEND 6731; GFX940-NEXT: s_nop 0 6732; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 6733; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6734; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6735; GFX940-NEXT: s_waitcnt vmcnt(0) 6736; GFX940-NEXT: s_setpc_b64 s[30:31] 6737 %vec0 = call <4 x half> asm "; def $0", "=v"() 6738 %vec1 = call <4 x half> asm "; def $0", "=v"() 6739 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 6> 6740 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6741 ret void 6742} 6743 6744define void @v_shuffle_v3f16_v4f16__7_1_6(ptr addrspace(1) inreg %ptr) { 6745; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_6: 6746; GFX900: ; %bb.0: 6747; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6748; GFX900-NEXT: ;;#ASMSTART 6749; GFX900-NEXT: ; def v[0:1] 6750; GFX900-NEXT: ;;#ASMEND 6751; GFX900-NEXT: v_mov_b32_e32 v3, 0 6752; GFX900-NEXT: ;;#ASMSTART 6753; GFX900-NEXT: ; def v[1:2] 6754; GFX900-NEXT: ;;#ASMEND 6755; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6756; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 6757; GFX900-NEXT: global_store_short v3, v2, s[16:17] offset:4 6758; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 6759; GFX900-NEXT: s_waitcnt vmcnt(0) 6760; GFX900-NEXT: s_setpc_b64 s[30:31] 6761; 6762; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_6: 6763; GFX90A: ; %bb.0: 6764; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6765; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6766; GFX90A-NEXT: ;;#ASMSTART 6767; GFX90A-NEXT: ; def v[0:1] 6768; GFX90A-NEXT: ;;#ASMEND 6769; GFX90A-NEXT: ;;#ASMSTART 6770; GFX90A-NEXT: ; def v[2:3] 6771; GFX90A-NEXT: ;;#ASMEND 6772; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6773; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 6774; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6775; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6776; GFX90A-NEXT: s_waitcnt vmcnt(0) 6777; GFX90A-NEXT: s_setpc_b64 s[30:31] 6778; 6779; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_6: 6780; GFX940: ; %bb.0: 6781; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6782; GFX940-NEXT: v_mov_b32_e32 v4, 0 6783; GFX940-NEXT: ;;#ASMSTART 6784; GFX940-NEXT: ; def v[0:1] 6785; GFX940-NEXT: ;;#ASMEND 6786; GFX940-NEXT: ;;#ASMSTART 6787; GFX940-NEXT: ; def v[2:3] 6788; GFX940-NEXT: ;;#ASMEND 6789; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6790; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 6791; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6792; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6793; GFX940-NEXT: s_waitcnt vmcnt(0) 6794; GFX940-NEXT: s_setpc_b64 s[30:31] 6795 %vec0 = call <4 x half> asm "; def $0", "=v"() 6796 %vec1 = call <4 x half> asm "; def $0", "=v"() 6797 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 6> 6798 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6799 ret void 6800} 6801 6802define void @v_shuffle_v3f16_v4f16__7_2_6(ptr addrspace(1) inreg %ptr) { 6803; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_6: 6804; GFX900: ; %bb.0: 6805; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6806; GFX900-NEXT: v_mov_b32_e32 v4, 0 6807; GFX900-NEXT: ;;#ASMSTART 6808; GFX900-NEXT: ; def v[0:1] 6809; GFX900-NEXT: ;;#ASMEND 6810; GFX900-NEXT: ;;#ASMSTART 6811; GFX900-NEXT: ; def v[2:3] 6812; GFX900-NEXT: ;;#ASMEND 6813; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 6814; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 6815; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6816; GFX900-NEXT: s_waitcnt vmcnt(0) 6817; GFX900-NEXT: s_setpc_b64 s[30:31] 6818; 6819; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_6: 6820; GFX90A: ; %bb.0: 6821; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6822; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6823; GFX90A-NEXT: ;;#ASMSTART 6824; GFX90A-NEXT: ; def v[0:1] 6825; GFX90A-NEXT: ;;#ASMEND 6826; GFX90A-NEXT: ;;#ASMSTART 6827; GFX90A-NEXT: ; def v[2:3] 6828; GFX90A-NEXT: ;;#ASMEND 6829; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 6830; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6831; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6832; GFX90A-NEXT: s_waitcnt vmcnt(0) 6833; GFX90A-NEXT: s_setpc_b64 s[30:31] 6834; 6835; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_6: 6836; GFX940: ; %bb.0: 6837; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6838; GFX940-NEXT: v_mov_b32_e32 v4, 0 6839; GFX940-NEXT: ;;#ASMSTART 6840; GFX940-NEXT: ; def v[0:1] 6841; GFX940-NEXT: ;;#ASMEND 6842; GFX940-NEXT: ;;#ASMSTART 6843; GFX940-NEXT: ; def v[2:3] 6844; GFX940-NEXT: ;;#ASMEND 6845; GFX940-NEXT: s_nop 0 6846; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 6847; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6848; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6849; GFX940-NEXT: s_waitcnt vmcnt(0) 6850; GFX940-NEXT: s_setpc_b64 s[30:31] 6851 %vec0 = call <4 x half> asm "; def $0", "=v"() 6852 %vec1 = call <4 x half> asm "; def $0", "=v"() 6853 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 6> 6854 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6855 ret void 6856} 6857 6858define void @v_shuffle_v3f16_v4f16__7_3_6(ptr addrspace(1) inreg %ptr) { 6859; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_6: 6860; GFX900: ; %bb.0: 6861; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6862; GFX900-NEXT: v_mov_b32_e32 v4, 0 6863; GFX900-NEXT: ;;#ASMSTART 6864; GFX900-NEXT: ; def v[0:1] 6865; GFX900-NEXT: ;;#ASMEND 6866; GFX900-NEXT: ;;#ASMSTART 6867; GFX900-NEXT: ; def v[2:3] 6868; GFX900-NEXT: ;;#ASMEND 6869; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6870; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 6871; GFX900-NEXT: global_store_short v4, v3, s[16:17] offset:4 6872; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 6873; GFX900-NEXT: s_waitcnt vmcnt(0) 6874; GFX900-NEXT: s_setpc_b64 s[30:31] 6875; 6876; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_6: 6877; GFX90A: ; %bb.0: 6878; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6879; GFX90A-NEXT: v_mov_b32_e32 v4, 0 6880; GFX90A-NEXT: ;;#ASMSTART 6881; GFX90A-NEXT: ; def v[0:1] 6882; GFX90A-NEXT: ;;#ASMEND 6883; GFX90A-NEXT: ;;#ASMSTART 6884; GFX90A-NEXT: ; def v[2:3] 6885; GFX90A-NEXT: ;;#ASMEND 6886; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6887; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 6888; GFX90A-NEXT: global_store_short v4, v3, s[16:17] offset:4 6889; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 6890; GFX90A-NEXT: s_waitcnt vmcnt(0) 6891; GFX90A-NEXT: s_setpc_b64 s[30:31] 6892; 6893; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_6: 6894; GFX940: ; %bb.0: 6895; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6896; GFX940-NEXT: v_mov_b32_e32 v4, 0 6897; GFX940-NEXT: ;;#ASMSTART 6898; GFX940-NEXT: ; def v[0:1] 6899; GFX940-NEXT: ;;#ASMEND 6900; GFX940-NEXT: ;;#ASMSTART 6901; GFX940-NEXT: ; def v[2:3] 6902; GFX940-NEXT: ;;#ASMEND 6903; GFX940-NEXT: s_mov_b32 s2, 0x7060302 6904; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 6905; GFX940-NEXT: global_store_short v4, v3, s[0:1] offset:4 sc0 sc1 6906; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 6907; GFX940-NEXT: s_waitcnt vmcnt(0) 6908; GFX940-NEXT: s_setpc_b64 s[30:31] 6909 %vec0 = call <4 x half> asm "; def $0", "=v"() 6910 %vec1 = call <4 x half> asm "; def $0", "=v"() 6911 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 6> 6912 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6913 ret void 6914} 6915 6916define void @v_shuffle_v3f16_v4f16__7_4_6(ptr addrspace(1) inreg %ptr) { 6917; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_6: 6918; GFX900: ; %bb.0: 6919; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6920; GFX900-NEXT: v_mov_b32_e32 v2, 0 6921; GFX900-NEXT: ;;#ASMSTART 6922; GFX900-NEXT: ; def v[0:1] 6923; GFX900-NEXT: ;;#ASMEND 6924; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 6925; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6926; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6927; GFX900-NEXT: s_waitcnt vmcnt(0) 6928; GFX900-NEXT: s_setpc_b64 s[30:31] 6929; 6930; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_6: 6931; GFX90A: ; %bb.0: 6932; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6933; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6934; GFX90A-NEXT: ;;#ASMSTART 6935; GFX90A-NEXT: ; def v[0:1] 6936; GFX90A-NEXT: ;;#ASMEND 6937; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 6938; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6939; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6940; GFX90A-NEXT: s_waitcnt vmcnt(0) 6941; GFX90A-NEXT: s_setpc_b64 s[30:31] 6942; 6943; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_6: 6944; GFX940: ; %bb.0: 6945; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6946; GFX940-NEXT: v_mov_b32_e32 v2, 0 6947; GFX940-NEXT: ;;#ASMSTART 6948; GFX940-NEXT: ; def v[0:1] 6949; GFX940-NEXT: ;;#ASMEND 6950; GFX940-NEXT: s_nop 0 6951; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 6952; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 6953; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 6954; GFX940-NEXT: s_waitcnt vmcnt(0) 6955; GFX940-NEXT: s_setpc_b64 s[30:31] 6956 %vec0 = call <4 x half> asm "; def $0", "=v"() 6957 %vec1 = call <4 x half> asm "; def $0", "=v"() 6958 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 6> 6959 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 6960 ret void 6961} 6962 6963define void @v_shuffle_v3f16_v4f16__7_5_6(ptr addrspace(1) inreg %ptr) { 6964; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_6: 6965; GFX900: ; %bb.0: 6966; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6967; GFX900-NEXT: v_mov_b32_e32 v2, 0 6968; GFX900-NEXT: ;;#ASMSTART 6969; GFX900-NEXT: ; def v[0:1] 6970; GFX900-NEXT: ;;#ASMEND 6971; GFX900-NEXT: s_mov_b32 s4, 0x7060302 6972; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 6973; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 6974; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 6975; GFX900-NEXT: s_waitcnt vmcnt(0) 6976; GFX900-NEXT: s_setpc_b64 s[30:31] 6977; 6978; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_6: 6979; GFX90A: ; %bb.0: 6980; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6981; GFX90A-NEXT: v_mov_b32_e32 v2, 0 6982; GFX90A-NEXT: ;;#ASMSTART 6983; GFX90A-NEXT: ; def v[0:1] 6984; GFX90A-NEXT: ;;#ASMEND 6985; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 6986; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 6987; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 6988; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 6989; GFX90A-NEXT: s_waitcnt vmcnt(0) 6990; GFX90A-NEXT: s_setpc_b64 s[30:31] 6991; 6992; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_6: 6993; GFX940: ; %bb.0: 6994; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6995; GFX940-NEXT: v_mov_b32_e32 v2, 0 6996; GFX940-NEXT: ;;#ASMSTART 6997; GFX940-NEXT: ; def v[0:1] 6998; GFX940-NEXT: ;;#ASMEND 6999; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7000; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 7001; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7002; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7003; GFX940-NEXT: s_waitcnt vmcnt(0) 7004; GFX940-NEXT: s_setpc_b64 s[30:31] 7005 %vec0 = call <4 x half> asm "; def $0", "=v"() 7006 %vec1 = call <4 x half> asm "; def $0", "=v"() 7007 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 6> 7008 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7009 ret void 7010} 7011 7012define void @v_shuffle_v3f16_v4f16__u_7_7(ptr addrspace(1) inreg %ptr) { 7013; GFX900-LABEL: v_shuffle_v3f16_v4f16__u_7_7: 7014; GFX900: ; %bb.0: 7015; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7016; GFX900-NEXT: ;;#ASMSTART 7017; GFX900-NEXT: ; def v[0:1] 7018; GFX900-NEXT: ;;#ASMEND 7019; GFX900-NEXT: s_mov_b32 s4, 0xffff 7020; GFX900-NEXT: v_mov_b32_e32 v2, 0 7021; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 7022; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7023; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7024; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 7025; GFX900-NEXT: s_waitcnt vmcnt(0) 7026; GFX900-NEXT: s_setpc_b64 s[30:31] 7027; 7028; GFX90A-LABEL: v_shuffle_v3f16_v4f16__u_7_7: 7029; GFX90A: ; %bb.0: 7030; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7031; GFX90A-NEXT: ;;#ASMSTART 7032; GFX90A-NEXT: ; def v[0:1] 7033; GFX90A-NEXT: ;;#ASMEND 7034; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7035; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7036; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 7037; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7038; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7039; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 7040; GFX90A-NEXT: s_waitcnt vmcnt(0) 7041; GFX90A-NEXT: s_setpc_b64 s[30:31] 7042; 7043; GFX940-LABEL: v_shuffle_v3f16_v4f16__u_7_7: 7044; GFX940: ; %bb.0: 7045; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7046; GFX940-NEXT: ;;#ASMSTART 7047; GFX940-NEXT: ; def v[0:1] 7048; GFX940-NEXT: ;;#ASMEND 7049; GFX940-NEXT: s_mov_b32 s2, 0xffff 7050; GFX940-NEXT: v_mov_b32_e32 v2, 0 7051; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 7052; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7053; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7054; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7055; GFX940-NEXT: s_waitcnt vmcnt(0) 7056; GFX940-NEXT: s_setpc_b64 s[30:31] 7057 %vec0 = call <4 x half> asm "; def $0", "=v"() 7058 %vec1 = call <4 x half> asm "; def $0", "=v"() 7059 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 7, i32 7> 7060 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7061 ret void 7062} 7063 7064define void @v_shuffle_v3f16_v4f16__0_7_7(ptr addrspace(1) inreg %ptr) { 7065; GFX900-LABEL: v_shuffle_v3f16_v4f16__0_7_7: 7066; GFX900: ; %bb.0: 7067; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7068; GFX900-NEXT: ;;#ASMSTART 7069; GFX900-NEXT: ; def v[0:1] 7070; GFX900-NEXT: ;;#ASMEND 7071; GFX900-NEXT: s_mov_b32 s4, 0xffff 7072; GFX900-NEXT: v_mov_b32_e32 v3, 0 7073; GFX900-NEXT: ;;#ASMSTART 7074; GFX900-NEXT: ; def v[1:2] 7075; GFX900-NEXT: ;;#ASMEND 7076; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v2 7077; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 7078; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 7079; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 7080; GFX900-NEXT: s_waitcnt vmcnt(0) 7081; GFX900-NEXT: s_setpc_b64 s[30:31] 7082; 7083; GFX90A-LABEL: v_shuffle_v3f16_v4f16__0_7_7: 7084; GFX90A: ; %bb.0: 7085; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7086; GFX90A-NEXT: ;;#ASMSTART 7087; GFX90A-NEXT: ; def v[0:1] 7088; GFX90A-NEXT: ;;#ASMEND 7089; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7090; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7091; GFX90A-NEXT: ;;#ASMSTART 7092; GFX90A-NEXT: ; def v[2:3] 7093; GFX90A-NEXT: ;;#ASMEND 7094; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v3 7095; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7096; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7097; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7098; GFX90A-NEXT: s_waitcnt vmcnt(0) 7099; GFX90A-NEXT: s_setpc_b64 s[30:31] 7100; 7101; GFX940-LABEL: v_shuffle_v3f16_v4f16__0_7_7: 7102; GFX940: ; %bb.0: 7103; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7104; GFX940-NEXT: ;;#ASMSTART 7105; GFX940-NEXT: ; def v[0:1] 7106; GFX940-NEXT: ;;#ASMEND 7107; GFX940-NEXT: s_mov_b32 s2, 0xffff 7108; GFX940-NEXT: v_mov_b32_e32 v4, 0 7109; GFX940-NEXT: ;;#ASMSTART 7110; GFX940-NEXT: ; def v[2:3] 7111; GFX940-NEXT: ;;#ASMEND 7112; GFX940-NEXT: s_nop 0 7113; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 7114; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7115; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7116; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7117; GFX940-NEXT: s_waitcnt vmcnt(0) 7118; GFX940-NEXT: s_setpc_b64 s[30:31] 7119 %vec0 = call <4 x half> asm "; def $0", "=v"() 7120 %vec1 = call <4 x half> asm "; def $0", "=v"() 7121 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 7, i32 7> 7122 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7123 ret void 7124} 7125 7126define void @v_shuffle_v3f16_v4f16__1_7_7(ptr addrspace(1) inreg %ptr) { 7127; GFX900-LABEL: v_shuffle_v3f16_v4f16__1_7_7: 7128; GFX900: ; %bb.0: 7129; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7130; GFX900-NEXT: ;;#ASMSTART 7131; GFX900-NEXT: ; def v[0:1] 7132; GFX900-NEXT: ;;#ASMEND 7133; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7134; GFX900-NEXT: v_mov_b32_e32 v3, 0 7135; GFX900-NEXT: ;;#ASMSTART 7136; GFX900-NEXT: ; def v[1:2] 7137; GFX900-NEXT: ;;#ASMEND 7138; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 7139; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 7140; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 7141; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 7142; GFX900-NEXT: s_waitcnt vmcnt(0) 7143; GFX900-NEXT: s_setpc_b64 s[30:31] 7144; 7145; GFX90A-LABEL: v_shuffle_v3f16_v4f16__1_7_7: 7146; GFX90A: ; %bb.0: 7147; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7148; GFX90A-NEXT: ;;#ASMSTART 7149; GFX90A-NEXT: ; def v[0:1] 7150; GFX90A-NEXT: ;;#ASMEND 7151; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7152; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7153; GFX90A-NEXT: ;;#ASMSTART 7154; GFX90A-NEXT: ; def v[2:3] 7155; GFX90A-NEXT: ;;#ASMEND 7156; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 7157; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7158; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7159; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7160; GFX90A-NEXT: s_waitcnt vmcnt(0) 7161; GFX90A-NEXT: s_setpc_b64 s[30:31] 7162; 7163; GFX940-LABEL: v_shuffle_v3f16_v4f16__1_7_7: 7164; GFX940: ; %bb.0: 7165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7166; GFX940-NEXT: ;;#ASMSTART 7167; GFX940-NEXT: ; def v[0:1] 7168; GFX940-NEXT: ;;#ASMEND 7169; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7170; GFX940-NEXT: v_mov_b32_e32 v4, 0 7171; GFX940-NEXT: ;;#ASMSTART 7172; GFX940-NEXT: ; def v[2:3] 7173; GFX940-NEXT: ;;#ASMEND 7174; GFX940-NEXT: s_nop 0 7175; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 7176; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7177; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7178; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7179; GFX940-NEXT: s_waitcnt vmcnt(0) 7180; GFX940-NEXT: s_setpc_b64 s[30:31] 7181 %vec0 = call <4 x half> asm "; def $0", "=v"() 7182 %vec1 = call <4 x half> asm "; def $0", "=v"() 7183 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 7, i32 7> 7184 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7185 ret void 7186} 7187 7188define void @v_shuffle_v3f16_v4f16__2_7_7(ptr addrspace(1) inreg %ptr) { 7189; GFX900-LABEL: v_shuffle_v3f16_v4f16__2_7_7: 7190; GFX900: ; %bb.0: 7191; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7192; GFX900-NEXT: ;;#ASMSTART 7193; GFX900-NEXT: ; def v[0:1] 7194; GFX900-NEXT: ;;#ASMEND 7195; GFX900-NEXT: s_mov_b32 s4, 0xffff 7196; GFX900-NEXT: v_mov_b32_e32 v4, 0 7197; GFX900-NEXT: ;;#ASMSTART 7198; GFX900-NEXT: ; def v[2:3] 7199; GFX900-NEXT: ;;#ASMEND 7200; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v3 7201; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7202; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 7203; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7204; GFX900-NEXT: s_waitcnt vmcnt(0) 7205; GFX900-NEXT: s_setpc_b64 s[30:31] 7206; 7207; GFX90A-LABEL: v_shuffle_v3f16_v4f16__2_7_7: 7208; GFX90A: ; %bb.0: 7209; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7210; GFX90A-NEXT: ;;#ASMSTART 7211; GFX90A-NEXT: ; def v[0:1] 7212; GFX90A-NEXT: ;;#ASMEND 7213; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7214; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7215; GFX90A-NEXT: ;;#ASMSTART 7216; GFX90A-NEXT: ; def v[2:3] 7217; GFX90A-NEXT: ;;#ASMEND 7218; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v3 7219; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7220; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7221; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7222; GFX90A-NEXT: s_waitcnt vmcnt(0) 7223; GFX90A-NEXT: s_setpc_b64 s[30:31] 7224; 7225; GFX940-LABEL: v_shuffle_v3f16_v4f16__2_7_7: 7226; GFX940: ; %bb.0: 7227; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7228; GFX940-NEXT: ;;#ASMSTART 7229; GFX940-NEXT: ; def v[0:1] 7230; GFX940-NEXT: ;;#ASMEND 7231; GFX940-NEXT: s_mov_b32 s2, 0xffff 7232; GFX940-NEXT: v_mov_b32_e32 v4, 0 7233; GFX940-NEXT: ;;#ASMSTART 7234; GFX940-NEXT: ; def v[2:3] 7235; GFX940-NEXT: ;;#ASMEND 7236; GFX940-NEXT: s_nop 0 7237; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 7238; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7239; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7240; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7241; GFX940-NEXT: s_waitcnt vmcnt(0) 7242; GFX940-NEXT: s_setpc_b64 s[30:31] 7243 %vec0 = call <4 x half> asm "; def $0", "=v"() 7244 %vec1 = call <4 x half> asm "; def $0", "=v"() 7245 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 7, i32 7> 7246 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7247 ret void 7248} 7249 7250define void @v_shuffle_v3f16_v4f16__3_7_7(ptr addrspace(1) inreg %ptr) { 7251; GFX900-LABEL: v_shuffle_v3f16_v4f16__3_7_7: 7252; GFX900: ; %bb.0: 7253; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7254; GFX900-NEXT: ;;#ASMSTART 7255; GFX900-NEXT: ; def v[0:1] 7256; GFX900-NEXT: ;;#ASMEND 7257; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7258; GFX900-NEXT: v_mov_b32_e32 v4, 0 7259; GFX900-NEXT: ;;#ASMSTART 7260; GFX900-NEXT: ; def v[2:3] 7261; GFX900-NEXT: ;;#ASMEND 7262; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 7263; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7264; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7265; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 7266; GFX900-NEXT: s_waitcnt vmcnt(0) 7267; GFX900-NEXT: s_setpc_b64 s[30:31] 7268; 7269; GFX90A-LABEL: v_shuffle_v3f16_v4f16__3_7_7: 7270; GFX90A: ; %bb.0: 7271; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7272; GFX90A-NEXT: ;;#ASMSTART 7273; GFX90A-NEXT: ; def v[0:1] 7274; GFX90A-NEXT: ;;#ASMEND 7275; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7276; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7277; GFX90A-NEXT: ;;#ASMSTART 7278; GFX90A-NEXT: ; def v[2:3] 7279; GFX90A-NEXT: ;;#ASMEND 7280; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 7281; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7282; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7283; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7284; GFX90A-NEXT: s_waitcnt vmcnt(0) 7285; GFX90A-NEXT: s_setpc_b64 s[30:31] 7286; 7287; GFX940-LABEL: v_shuffle_v3f16_v4f16__3_7_7: 7288; GFX940: ; %bb.0: 7289; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7290; GFX940-NEXT: ;;#ASMSTART 7291; GFX940-NEXT: ; def v[0:1] 7292; GFX940-NEXT: ;;#ASMEND 7293; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7294; GFX940-NEXT: v_mov_b32_e32 v4, 0 7295; GFX940-NEXT: ;;#ASMSTART 7296; GFX940-NEXT: ; def v[2:3] 7297; GFX940-NEXT: ;;#ASMEND 7298; GFX940-NEXT: s_nop 0 7299; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 7300; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7301; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7302; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7303; GFX940-NEXT: s_waitcnt vmcnt(0) 7304; GFX940-NEXT: s_setpc_b64 s[30:31] 7305 %vec0 = call <4 x half> asm "; def $0", "=v"() 7306 %vec1 = call <4 x half> asm "; def $0", "=v"() 7307 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 7, i32 7> 7308 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7309 ret void 7310} 7311 7312define void @v_shuffle_v3f16_v4f16__4_7_7(ptr addrspace(1) inreg %ptr) { 7313; GFX900-LABEL: v_shuffle_v3f16_v4f16__4_7_7: 7314; GFX900: ; %bb.0: 7315; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7316; GFX900-NEXT: ;;#ASMSTART 7317; GFX900-NEXT: ; def v[0:1] 7318; GFX900-NEXT: ;;#ASMEND 7319; GFX900-NEXT: s_mov_b32 s4, 0xffff 7320; GFX900-NEXT: v_mov_b32_e32 v2, 0 7321; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 7322; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7323; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7324; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 7325; GFX900-NEXT: s_waitcnt vmcnt(0) 7326; GFX900-NEXT: s_setpc_b64 s[30:31] 7327; 7328; GFX90A-LABEL: v_shuffle_v3f16_v4f16__4_7_7: 7329; GFX90A: ; %bb.0: 7330; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7331; GFX90A-NEXT: ;;#ASMSTART 7332; GFX90A-NEXT: ; def v[0:1] 7333; GFX90A-NEXT: ;;#ASMEND 7334; GFX90A-NEXT: s_mov_b32 s4, 0xffff 7335; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7336; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 7337; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7338; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7339; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 7340; GFX90A-NEXT: s_waitcnt vmcnt(0) 7341; GFX90A-NEXT: s_setpc_b64 s[30:31] 7342; 7343; GFX940-LABEL: v_shuffle_v3f16_v4f16__4_7_7: 7344; GFX940: ; %bb.0: 7345; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7346; GFX940-NEXT: ;;#ASMSTART 7347; GFX940-NEXT: ; def v[0:1] 7348; GFX940-NEXT: ;;#ASMEND 7349; GFX940-NEXT: s_mov_b32 s2, 0xffff 7350; GFX940-NEXT: v_mov_b32_e32 v2, 0 7351; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 7352; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7353; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7354; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7355; GFX940-NEXT: s_waitcnt vmcnt(0) 7356; GFX940-NEXT: s_setpc_b64 s[30:31] 7357 %vec0 = call <4 x half> asm "; def $0", "=v"() 7358 %vec1 = call <4 x half> asm "; def $0", "=v"() 7359 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 7, i32 7> 7360 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7361 ret void 7362} 7363 7364define void @v_shuffle_v3f16_v4f16__5_7_7(ptr addrspace(1) inreg %ptr) { 7365; GFX900-LABEL: v_shuffle_v3f16_v4f16__5_7_7: 7366; GFX900: ; %bb.0: 7367; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7368; GFX900-NEXT: ;;#ASMSTART 7369; GFX900-NEXT: ; def v[0:1] 7370; GFX900-NEXT: ;;#ASMEND 7371; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7372; GFX900-NEXT: v_mov_b32_e32 v2, 0 7373; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 7374; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7375; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7376; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 7377; GFX900-NEXT: s_waitcnt vmcnt(0) 7378; GFX900-NEXT: s_setpc_b64 s[30:31] 7379; 7380; GFX90A-LABEL: v_shuffle_v3f16_v4f16__5_7_7: 7381; GFX90A: ; %bb.0: 7382; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7383; GFX90A-NEXT: ;;#ASMSTART 7384; GFX90A-NEXT: ; def v[0:1] 7385; GFX90A-NEXT: ;;#ASMEND 7386; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7387; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7388; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 7389; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7390; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7391; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 7392; GFX90A-NEXT: s_waitcnt vmcnt(0) 7393; GFX90A-NEXT: s_setpc_b64 s[30:31] 7394; 7395; GFX940-LABEL: v_shuffle_v3f16_v4f16__5_7_7: 7396; GFX940: ; %bb.0: 7397; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7398; GFX940-NEXT: ;;#ASMSTART 7399; GFX940-NEXT: ; def v[0:1] 7400; GFX940-NEXT: ;;#ASMEND 7401; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7402; GFX940-NEXT: v_mov_b32_e32 v2, 0 7403; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 7404; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7405; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7406; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7407; GFX940-NEXT: s_waitcnt vmcnt(0) 7408; GFX940-NEXT: s_setpc_b64 s[30:31] 7409 %vec0 = call <4 x half> asm "; def $0", "=v"() 7410 %vec1 = call <4 x half> asm "; def $0", "=v"() 7411 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 7, i32 7> 7412 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7413 ret void 7414} 7415 7416define void @v_shuffle_v3f16_v4f16__6_7_7(ptr addrspace(1) inreg %ptr) { 7417; GFX900-LABEL: v_shuffle_v3f16_v4f16__6_7_7: 7418; GFX900: ; %bb.0: 7419; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7420; GFX900-NEXT: v_mov_b32_e32 v2, 0 7421; GFX900-NEXT: ;;#ASMSTART 7422; GFX900-NEXT: ; def v[0:1] 7423; GFX900-NEXT: ;;#ASMEND 7424; GFX900-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 7425; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 7426; GFX900-NEXT: s_waitcnt vmcnt(0) 7427; GFX900-NEXT: s_setpc_b64 s[30:31] 7428; 7429; GFX90A-LABEL: v_shuffle_v3f16_v4f16__6_7_7: 7430; GFX90A: ; %bb.0: 7431; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7432; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7433; GFX90A-NEXT: ;;#ASMSTART 7434; GFX90A-NEXT: ; def v[0:1] 7435; GFX90A-NEXT: ;;#ASMEND 7436; GFX90A-NEXT: global_store_short_d16_hi v2, v1, s[16:17] offset:4 7437; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 7438; GFX90A-NEXT: s_waitcnt vmcnt(0) 7439; GFX90A-NEXT: s_setpc_b64 s[30:31] 7440; 7441; GFX940-LABEL: v_shuffle_v3f16_v4f16__6_7_7: 7442; GFX940: ; %bb.0: 7443; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7444; GFX940-NEXT: v_mov_b32_e32 v2, 0 7445; GFX940-NEXT: ;;#ASMSTART 7446; GFX940-NEXT: ; def v[0:1] 7447; GFX940-NEXT: ;;#ASMEND 7448; GFX940-NEXT: global_store_short_d16_hi v2, v1, s[0:1] offset:4 sc0 sc1 7449; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 7450; GFX940-NEXT: s_waitcnt vmcnt(0) 7451; GFX940-NEXT: s_setpc_b64 s[30:31] 7452 %vec0 = call <4 x half> asm "; def $0", "=v"() 7453 %vec1 = call <4 x half> asm "; def $0", "=v"() 7454 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 7, i32 7> 7455 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7456 ret void 7457} 7458 7459define void @v_shuffle_v3f16_v4f16__7_u_7(ptr addrspace(1) inreg %ptr) { 7460; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_u_7: 7461; GFX900: ; %bb.0: 7462; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7463; GFX900-NEXT: ;;#ASMSTART 7464; GFX900-NEXT: ; def v[0:1] 7465; GFX900-NEXT: ;;#ASMEND 7466; GFX900-NEXT: v_mov_b32_e32 v2, 0 7467; GFX900-NEXT: v_lshrrev_b32_e32 v0, 16, v1 7468; GFX900-NEXT: v_alignbit_b32 v1, s4, v1, 16 7469; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 7470; GFX900-NEXT: global_store_short v2, v0, s[16:17] offset:4 7471; GFX900-NEXT: s_waitcnt vmcnt(0) 7472; GFX900-NEXT: s_setpc_b64 s[30:31] 7473; 7474; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_u_7: 7475; GFX90A: ; %bb.0: 7476; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7477; GFX90A-NEXT: ;;#ASMSTART 7478; GFX90A-NEXT: ; def v[0:1] 7479; GFX90A-NEXT: ;;#ASMEND 7480; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7481; GFX90A-NEXT: v_lshrrev_b32_e32 v0, 16, v1 7482; GFX90A-NEXT: v_alignbit_b32 v1, s4, v1, 16 7483; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 7484; GFX90A-NEXT: global_store_short v2, v0, s[16:17] offset:4 7485; GFX90A-NEXT: s_waitcnt vmcnt(0) 7486; GFX90A-NEXT: s_setpc_b64 s[30:31] 7487; 7488; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_u_7: 7489; GFX940: ; %bb.0: 7490; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7491; GFX940-NEXT: ;;#ASMSTART 7492; GFX940-NEXT: ; def v[0:1] 7493; GFX940-NEXT: ;;#ASMEND 7494; GFX940-NEXT: v_mov_b32_e32 v2, 0 7495; GFX940-NEXT: v_lshrrev_b32_e32 v0, 16, v1 7496; GFX940-NEXT: v_alignbit_b32 v1, s0, v1, 16 7497; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 7498; GFX940-NEXT: global_store_short v2, v0, s[0:1] offset:4 sc0 sc1 7499; GFX940-NEXT: s_waitcnt vmcnt(0) 7500; GFX940-NEXT: s_setpc_b64 s[30:31] 7501 %vec0 = call <4 x half> asm "; def $0", "=v"() 7502 %vec1 = call <4 x half> asm "; def $0", "=v"() 7503 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 7> 7504 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7505 ret void 7506} 7507 7508define void @v_shuffle_v3f16_v4f16__7_0_7(ptr addrspace(1) inreg %ptr) { 7509; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_0_7: 7510; GFX900: ; %bb.0: 7511; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7512; GFX900-NEXT: ;;#ASMSTART 7513; GFX900-NEXT: ; def v[0:1] 7514; GFX900-NEXT: ;;#ASMEND 7515; GFX900-NEXT: v_mov_b32_e32 v3, 0 7516; GFX900-NEXT: ;;#ASMSTART 7517; GFX900-NEXT: ; def v[1:2] 7518; GFX900-NEXT: ;;#ASMEND 7519; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 7520; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 7521; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 7522; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 7523; GFX900-NEXT: s_waitcnt vmcnt(0) 7524; GFX900-NEXT: s_setpc_b64 s[30:31] 7525; 7526; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_0_7: 7527; GFX90A: ; %bb.0: 7528; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7529; GFX90A-NEXT: ;;#ASMSTART 7530; GFX90A-NEXT: ; def v[0:1] 7531; GFX90A-NEXT: ;;#ASMEND 7532; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7533; GFX90A-NEXT: ;;#ASMSTART 7534; GFX90A-NEXT: ; def v[2:3] 7535; GFX90A-NEXT: ;;#ASMEND 7536; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 7537; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7538; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7539; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7540; GFX90A-NEXT: s_waitcnt vmcnt(0) 7541; GFX90A-NEXT: s_setpc_b64 s[30:31] 7542; 7543; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_0_7: 7544; GFX940: ; %bb.0: 7545; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7546; GFX940-NEXT: ;;#ASMSTART 7547; GFX940-NEXT: ; def v[0:1] 7548; GFX940-NEXT: ;;#ASMEND 7549; GFX940-NEXT: v_mov_b32_e32 v4, 0 7550; GFX940-NEXT: ;;#ASMSTART 7551; GFX940-NEXT: ; def v[2:3] 7552; GFX940-NEXT: ;;#ASMEND 7553; GFX940-NEXT: s_nop 0 7554; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 7555; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7556; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7557; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7558; GFX940-NEXT: s_waitcnt vmcnt(0) 7559; GFX940-NEXT: s_setpc_b64 s[30:31] 7560 %vec0 = call <4 x half> asm "; def $0", "=v"() 7561 %vec1 = call <4 x half> asm "; def $0", "=v"() 7562 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 7> 7563 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7564 ret void 7565} 7566 7567define void @v_shuffle_v3f16_v4f16__7_1_7(ptr addrspace(1) inreg %ptr) { 7568; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_1_7: 7569; GFX900: ; %bb.0: 7570; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7571; GFX900-NEXT: ;;#ASMSTART 7572; GFX900-NEXT: ; def v[0:1] 7573; GFX900-NEXT: ;;#ASMEND 7574; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7575; GFX900-NEXT: v_mov_b32_e32 v3, 0 7576; GFX900-NEXT: ;;#ASMSTART 7577; GFX900-NEXT: ; def v[1:2] 7578; GFX900-NEXT: ;;#ASMEND 7579; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 7580; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v2 7581; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 7582; GFX900-NEXT: global_store_short v3, v1, s[16:17] offset:4 7583; GFX900-NEXT: s_waitcnt vmcnt(0) 7584; GFX900-NEXT: s_setpc_b64 s[30:31] 7585; 7586; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_1_7: 7587; GFX90A: ; %bb.0: 7588; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7589; GFX90A-NEXT: ;;#ASMSTART 7590; GFX90A-NEXT: ; def v[0:1] 7591; GFX90A-NEXT: ;;#ASMEND 7592; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7593; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7594; GFX90A-NEXT: ;;#ASMSTART 7595; GFX90A-NEXT: ; def v[2:3] 7596; GFX90A-NEXT: ;;#ASMEND 7597; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 7598; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7599; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7600; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7601; GFX90A-NEXT: s_waitcnt vmcnt(0) 7602; GFX90A-NEXT: s_setpc_b64 s[30:31] 7603; 7604; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_1_7: 7605; GFX940: ; %bb.0: 7606; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7607; GFX940-NEXT: ;;#ASMSTART 7608; GFX940-NEXT: ; def v[0:1] 7609; GFX940-NEXT: ;;#ASMEND 7610; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7611; GFX940-NEXT: v_mov_b32_e32 v4, 0 7612; GFX940-NEXT: ;;#ASMSTART 7613; GFX940-NEXT: ; def v[2:3] 7614; GFX940-NEXT: ;;#ASMEND 7615; GFX940-NEXT: s_nop 0 7616; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 7617; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7618; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7619; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7620; GFX940-NEXT: s_waitcnt vmcnt(0) 7621; GFX940-NEXT: s_setpc_b64 s[30:31] 7622 %vec0 = call <4 x half> asm "; def $0", "=v"() 7623 %vec1 = call <4 x half> asm "; def $0", "=v"() 7624 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 7> 7625 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7626 ret void 7627} 7628 7629define void @v_shuffle_v3f16_v4f16__7_2_7(ptr addrspace(1) inreg %ptr) { 7630; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_2_7: 7631; GFX900: ; %bb.0: 7632; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7633; GFX900-NEXT: ;;#ASMSTART 7634; GFX900-NEXT: ; def v[0:1] 7635; GFX900-NEXT: ;;#ASMEND 7636; GFX900-NEXT: v_mov_b32_e32 v4, 0 7637; GFX900-NEXT: ;;#ASMSTART 7638; GFX900-NEXT: ; def v[2:3] 7639; GFX900-NEXT: ;;#ASMEND 7640; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 7641; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7642; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 7643; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7644; GFX900-NEXT: s_waitcnt vmcnt(0) 7645; GFX900-NEXT: s_setpc_b64 s[30:31] 7646; 7647; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_2_7: 7648; GFX90A: ; %bb.0: 7649; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7650; GFX90A-NEXT: ;;#ASMSTART 7651; GFX90A-NEXT: ; def v[0:1] 7652; GFX90A-NEXT: ;;#ASMEND 7653; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7654; GFX90A-NEXT: ;;#ASMSTART 7655; GFX90A-NEXT: ; def v[2:3] 7656; GFX90A-NEXT: ;;#ASMEND 7657; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 7658; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7659; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7660; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7661; GFX90A-NEXT: s_waitcnt vmcnt(0) 7662; GFX90A-NEXT: s_setpc_b64 s[30:31] 7663; 7664; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_2_7: 7665; GFX940: ; %bb.0: 7666; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7667; GFX940-NEXT: ;;#ASMSTART 7668; GFX940-NEXT: ; def v[0:1] 7669; GFX940-NEXT: ;;#ASMEND 7670; GFX940-NEXT: v_mov_b32_e32 v4, 0 7671; GFX940-NEXT: ;;#ASMSTART 7672; GFX940-NEXT: ; def v[2:3] 7673; GFX940-NEXT: ;;#ASMEND 7674; GFX940-NEXT: s_nop 0 7675; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 7676; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7677; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7678; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7679; GFX940-NEXT: s_waitcnt vmcnt(0) 7680; GFX940-NEXT: s_setpc_b64 s[30:31] 7681 %vec0 = call <4 x half> asm "; def $0", "=v"() 7682 %vec1 = call <4 x half> asm "; def $0", "=v"() 7683 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 7> 7684 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7685 ret void 7686} 7687 7688define void @v_shuffle_v3f16_v4f16__7_3_7(ptr addrspace(1) inreg %ptr) { 7689; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_3_7: 7690; GFX900: ; %bb.0: 7691; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7692; GFX900-NEXT: ;;#ASMSTART 7693; GFX900-NEXT: ; def v[0:1] 7694; GFX900-NEXT: ;;#ASMEND 7695; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7696; GFX900-NEXT: v_mov_b32_e32 v4, 0 7697; GFX900-NEXT: ;;#ASMSTART 7698; GFX900-NEXT: ; def v[2:3] 7699; GFX900-NEXT: ;;#ASMEND 7700; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 7701; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7702; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 7703; GFX900-NEXT: global_store_short v4, v1, s[16:17] offset:4 7704; GFX900-NEXT: s_waitcnt vmcnt(0) 7705; GFX900-NEXT: s_setpc_b64 s[30:31] 7706; 7707; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_3_7: 7708; GFX90A: ; %bb.0: 7709; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7710; GFX90A-NEXT: ;;#ASMSTART 7711; GFX90A-NEXT: ; def v[0:1] 7712; GFX90A-NEXT: ;;#ASMEND 7713; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7714; GFX90A-NEXT: v_mov_b32_e32 v4, 0 7715; GFX90A-NEXT: ;;#ASMSTART 7716; GFX90A-NEXT: ; def v[2:3] 7717; GFX90A-NEXT: ;;#ASMEND 7718; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 7719; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7720; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 7721; GFX90A-NEXT: global_store_short v4, v1, s[16:17] offset:4 7722; GFX90A-NEXT: s_waitcnt vmcnt(0) 7723; GFX90A-NEXT: s_setpc_b64 s[30:31] 7724; 7725; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_3_7: 7726; GFX940: ; %bb.0: 7727; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7728; GFX940-NEXT: ;;#ASMSTART 7729; GFX940-NEXT: ; def v[0:1] 7730; GFX940-NEXT: ;;#ASMEND 7731; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7732; GFX940-NEXT: v_mov_b32_e32 v4, 0 7733; GFX940-NEXT: ;;#ASMSTART 7734; GFX940-NEXT: ; def v[2:3] 7735; GFX940-NEXT: ;;#ASMEND 7736; GFX940-NEXT: s_nop 0 7737; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 7738; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v3 7739; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 7740; GFX940-NEXT: global_store_short v4, v1, s[0:1] offset:4 sc0 sc1 7741; GFX940-NEXT: s_waitcnt vmcnt(0) 7742; GFX940-NEXT: s_setpc_b64 s[30:31] 7743 %vec0 = call <4 x half> asm "; def $0", "=v"() 7744 %vec1 = call <4 x half> asm "; def $0", "=v"() 7745 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 7> 7746 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7747 ret void 7748} 7749 7750define void @v_shuffle_v3f16_v4f16__7_4_7(ptr addrspace(1) inreg %ptr) { 7751; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_4_7: 7752; GFX900: ; %bb.0: 7753; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7754; GFX900-NEXT: ;;#ASMSTART 7755; GFX900-NEXT: ; def v[0:1] 7756; GFX900-NEXT: ;;#ASMEND 7757; GFX900-NEXT: v_mov_b32_e32 v2, 0 7758; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 7759; GFX900-NEXT: v_lshrrev_b32_e32 v3, 16, v1 7760; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7761; GFX900-NEXT: global_store_short v2, v3, s[16:17] offset:4 7762; GFX900-NEXT: s_waitcnt vmcnt(0) 7763; GFX900-NEXT: s_setpc_b64 s[30:31] 7764; 7765; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_4_7: 7766; GFX90A: ; %bb.0: 7767; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7768; GFX90A-NEXT: ;;#ASMSTART 7769; GFX90A-NEXT: ; def v[0:1] 7770; GFX90A-NEXT: ;;#ASMEND 7771; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7772; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 7773; GFX90A-NEXT: v_lshrrev_b32_e32 v3, 16, v1 7774; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7775; GFX90A-NEXT: global_store_short v2, v3, s[16:17] offset:4 7776; GFX90A-NEXT: s_waitcnt vmcnt(0) 7777; GFX90A-NEXT: s_setpc_b64 s[30:31] 7778; 7779; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_4_7: 7780; GFX940: ; %bb.0: 7781; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7782; GFX940-NEXT: ;;#ASMSTART 7783; GFX940-NEXT: ; def v[0:1] 7784; GFX940-NEXT: ;;#ASMEND 7785; GFX940-NEXT: v_mov_b32_e32 v2, 0 7786; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 7787; GFX940-NEXT: v_lshrrev_b32_e32 v3, 16, v1 7788; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7789; GFX940-NEXT: global_store_short v2, v3, s[0:1] offset:4 sc0 sc1 7790; GFX940-NEXT: s_waitcnt vmcnt(0) 7791; GFX940-NEXT: s_setpc_b64 s[30:31] 7792 %vec0 = call <4 x half> asm "; def $0", "=v"() 7793 %vec1 = call <4 x half> asm "; def $0", "=v"() 7794 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 7> 7795 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7796 ret void 7797} 7798 7799define void @v_shuffle_v3f16_v4f16__7_5_7(ptr addrspace(1) inreg %ptr) { 7800; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_5_7: 7801; GFX900: ; %bb.0: 7802; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7803; GFX900-NEXT: ;;#ASMSTART 7804; GFX900-NEXT: ; def v[0:1] 7805; GFX900-NEXT: ;;#ASMEND 7806; GFX900-NEXT: s_mov_b32 s4, 0x7060302 7807; GFX900-NEXT: v_mov_b32_e32 v2, 0 7808; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 7809; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7810; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7811; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 7812; GFX900-NEXT: s_waitcnt vmcnt(0) 7813; GFX900-NEXT: s_setpc_b64 s[30:31] 7814; 7815; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_5_7: 7816; GFX90A: ; %bb.0: 7817; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7818; GFX90A-NEXT: ;;#ASMSTART 7819; GFX90A-NEXT: ; def v[0:1] 7820; GFX90A-NEXT: ;;#ASMEND 7821; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 7822; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7823; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 7824; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7825; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7826; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 7827; GFX90A-NEXT: s_waitcnt vmcnt(0) 7828; GFX90A-NEXT: s_setpc_b64 s[30:31] 7829; 7830; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_5_7: 7831; GFX940: ; %bb.0: 7832; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7833; GFX940-NEXT: ;;#ASMSTART 7834; GFX940-NEXT: ; def v[0:1] 7835; GFX940-NEXT: ;;#ASMEND 7836; GFX940-NEXT: s_mov_b32 s2, 0x7060302 7837; GFX940-NEXT: v_mov_b32_e32 v2, 0 7838; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 7839; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7840; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7841; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7842; GFX940-NEXT: s_waitcnt vmcnt(0) 7843; GFX940-NEXT: s_setpc_b64 s[30:31] 7844 %vec0 = call <4 x half> asm "; def $0", "=v"() 7845 %vec1 = call <4 x half> asm "; def $0", "=v"() 7846 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 7> 7847 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7848 ret void 7849} 7850 7851define void @v_shuffle_v3f16_v4f16__7_6_7(ptr addrspace(1) inreg %ptr) { 7852; GFX900-LABEL: v_shuffle_v3f16_v4f16__7_6_7: 7853; GFX900: ; %bb.0: 7854; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7855; GFX900-NEXT: ;;#ASMSTART 7856; GFX900-NEXT: ; def v[0:1] 7857; GFX900-NEXT: ;;#ASMEND 7858; GFX900-NEXT: v_mov_b32_e32 v2, 0 7859; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 7860; GFX900-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7861; GFX900-NEXT: global_store_short v2, v1, s[16:17] offset:4 7862; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 7863; GFX900-NEXT: s_waitcnt vmcnt(0) 7864; GFX900-NEXT: s_setpc_b64 s[30:31] 7865; 7866; GFX90A-LABEL: v_shuffle_v3f16_v4f16__7_6_7: 7867; GFX90A: ; %bb.0: 7868; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7869; GFX90A-NEXT: ;;#ASMSTART 7870; GFX90A-NEXT: ; def v[0:1] 7871; GFX90A-NEXT: ;;#ASMEND 7872; GFX90A-NEXT: v_mov_b32_e32 v2, 0 7873; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 7874; GFX90A-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7875; GFX90A-NEXT: global_store_short v2, v1, s[16:17] offset:4 7876; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 7877; GFX90A-NEXT: s_waitcnt vmcnt(0) 7878; GFX90A-NEXT: s_setpc_b64 s[30:31] 7879; 7880; GFX940-LABEL: v_shuffle_v3f16_v4f16__7_6_7: 7881; GFX940: ; %bb.0: 7882; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7883; GFX940-NEXT: ;;#ASMSTART 7884; GFX940-NEXT: ; def v[0:1] 7885; GFX940-NEXT: ;;#ASMEND 7886; GFX940-NEXT: v_mov_b32_e32 v2, 0 7887; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 7888; GFX940-NEXT: v_lshrrev_b32_e32 v1, 16, v1 7889; GFX940-NEXT: global_store_short v2, v1, s[0:1] offset:4 sc0 sc1 7890; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 7891; GFX940-NEXT: s_waitcnt vmcnt(0) 7892; GFX940-NEXT: s_setpc_b64 s[30:31] 7893 %vec0 = call <4 x half> asm "; def $0", "=v"() 7894 %vec1 = call <4 x half> asm "; def $0", "=v"() 7895 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 7> 7896 store <3 x half> %shuf, ptr addrspace(1) %ptr, align 8 7897 ret void 7898} 7899 7900define void @s_shuffle_v3f16_v4f16__u_u_u() { 7901; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_u_u: 7902; GFX9: ; %bb.0: 7903; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7904; GFX9-NEXT: ;;#ASMSTART 7905; GFX9-NEXT: ; use s[8:9] 7906; GFX9-NEXT: ;;#ASMEND 7907; GFX9-NEXT: s_setpc_b64 s[30:31] 7908 %vec0 = call <4 x half> asm "; def $0", "=s"() 7909 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> poison 7910 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7911 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 7912 ret void 7913} 7914 7915define void @s_shuffle_v3f16_v4f16__0_u_u() { 7916; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_u_u: 7917; GFX900: ; %bb.0: 7918; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7919; GFX900-NEXT: ;;#ASMSTART 7920; GFX900-NEXT: ; def s[8:9] 7921; GFX900-NEXT: ;;#ASMEND 7922; GFX900-NEXT: ;;#ASMSTART 7923; GFX900-NEXT: ; use s[8:9] 7924; GFX900-NEXT: ;;#ASMEND 7925; GFX900-NEXT: s_setpc_b64 s[30:31] 7926; 7927; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_u_u: 7928; GFX90A: ; %bb.0: 7929; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7930; GFX90A-NEXT: ;;#ASMSTART 7931; GFX90A-NEXT: ; def s[8:9] 7932; GFX90A-NEXT: ;;#ASMEND 7933; GFX90A-NEXT: ;;#ASMSTART 7934; GFX90A-NEXT: ; use s[8:9] 7935; GFX90A-NEXT: ;;#ASMEND 7936; GFX90A-NEXT: s_setpc_b64 s[30:31] 7937; 7938; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_u_u: 7939; GFX940: ; %bb.0: 7940; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7941; GFX940-NEXT: ;;#ASMSTART 7942; GFX940-NEXT: ; def s[8:9] 7943; GFX940-NEXT: ;;#ASMEND 7944; GFX940-NEXT: s_nop 0 7945; GFX940-NEXT: ;;#ASMSTART 7946; GFX940-NEXT: ; use s[8:9] 7947; GFX940-NEXT: ;;#ASMEND 7948; GFX940-NEXT: s_setpc_b64 s[30:31] 7949 %vec0 = call <4 x half> asm "; def $0", "=s"() 7950 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 poison, i32 poison> 7951 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7952 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 7953 ret void 7954} 7955 7956define void @s_shuffle_v3f16_v4f16__1_u_u() { 7957; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_u_u: 7958; GFX900: ; %bb.0: 7959; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7960; GFX900-NEXT: ;;#ASMSTART 7961; GFX900-NEXT: ; def s[4:5] 7962; GFX900-NEXT: ;;#ASMEND 7963; GFX900-NEXT: s_lshr_b32 s8, s4, 16 7964; GFX900-NEXT: ;;#ASMSTART 7965; GFX900-NEXT: ; use s[8:9] 7966; GFX900-NEXT: ;;#ASMEND 7967; GFX900-NEXT: s_setpc_b64 s[30:31] 7968; 7969; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_u_u: 7970; GFX90A: ; %bb.0: 7971; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7972; GFX90A-NEXT: ;;#ASMSTART 7973; GFX90A-NEXT: ; def s[4:5] 7974; GFX90A-NEXT: ;;#ASMEND 7975; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 7976; GFX90A-NEXT: ;;#ASMSTART 7977; GFX90A-NEXT: ; use s[8:9] 7978; GFX90A-NEXT: ;;#ASMEND 7979; GFX90A-NEXT: s_setpc_b64 s[30:31] 7980; 7981; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_u_u: 7982; GFX940: ; %bb.0: 7983; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7984; GFX940-NEXT: ;;#ASMSTART 7985; GFX940-NEXT: ; def s[0:1] 7986; GFX940-NEXT: ;;#ASMEND 7987; GFX940-NEXT: s_lshr_b32 s8, s0, 16 7988; GFX940-NEXT: ;;#ASMSTART 7989; GFX940-NEXT: ; use s[8:9] 7990; GFX940-NEXT: ;;#ASMEND 7991; GFX940-NEXT: s_setpc_b64 s[30:31] 7992 %vec0 = call <4 x half> asm "; def $0", "=s"() 7993 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 poison, i32 poison> 7994 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 7995 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 7996 ret void 7997} 7998 7999define void @s_shuffle_v3f16_v4f16__2_u_u() { 8000; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_u_u: 8001; GFX900: ; %bb.0: 8002; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8003; GFX900-NEXT: ;;#ASMSTART 8004; GFX900-NEXT: ; def s[4:5] 8005; GFX900-NEXT: ;;#ASMEND 8006; GFX900-NEXT: s_mov_b32 s8, s5 8007; GFX900-NEXT: ;;#ASMSTART 8008; GFX900-NEXT: ; use s[8:9] 8009; GFX900-NEXT: ;;#ASMEND 8010; GFX900-NEXT: s_setpc_b64 s[30:31] 8011; 8012; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_u_u: 8013; GFX90A: ; %bb.0: 8014; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8015; GFX90A-NEXT: ;;#ASMSTART 8016; GFX90A-NEXT: ; def s[4:5] 8017; GFX90A-NEXT: ;;#ASMEND 8018; GFX90A-NEXT: s_mov_b32 s8, s5 8019; GFX90A-NEXT: ;;#ASMSTART 8020; GFX90A-NEXT: ; use s[8:9] 8021; GFX90A-NEXT: ;;#ASMEND 8022; GFX90A-NEXT: s_setpc_b64 s[30:31] 8023; 8024; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_u_u: 8025; GFX940: ; %bb.0: 8026; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8027; GFX940-NEXT: ;;#ASMSTART 8028; GFX940-NEXT: ; def s[0:1] 8029; GFX940-NEXT: ;;#ASMEND 8030; GFX940-NEXT: s_mov_b32 s8, s1 8031; GFX940-NEXT: ;;#ASMSTART 8032; GFX940-NEXT: ; use s[8:9] 8033; GFX940-NEXT: ;;#ASMEND 8034; GFX940-NEXT: s_setpc_b64 s[30:31] 8035 %vec0 = call <4 x half> asm "; def $0", "=s"() 8036 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 poison, i32 poison> 8037 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8038 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8039 ret void 8040} 8041 8042define void @s_shuffle_v3f16_v4f16__3_u_u() { 8043; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_u_u: 8044; GFX900: ; %bb.0: 8045; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8046; GFX900-NEXT: ;;#ASMSTART 8047; GFX900-NEXT: ; def s[4:5] 8048; GFX900-NEXT: ;;#ASMEND 8049; GFX900-NEXT: s_lshr_b32 s8, s5, 16 8050; GFX900-NEXT: ;;#ASMSTART 8051; GFX900-NEXT: ; use s[8:9] 8052; GFX900-NEXT: ;;#ASMEND 8053; GFX900-NEXT: s_setpc_b64 s[30:31] 8054; 8055; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_u_u: 8056; GFX90A: ; %bb.0: 8057; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8058; GFX90A-NEXT: ;;#ASMSTART 8059; GFX90A-NEXT: ; def s[4:5] 8060; GFX90A-NEXT: ;;#ASMEND 8061; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 8062; GFX90A-NEXT: ;;#ASMSTART 8063; GFX90A-NEXT: ; use s[8:9] 8064; GFX90A-NEXT: ;;#ASMEND 8065; GFX90A-NEXT: s_setpc_b64 s[30:31] 8066; 8067; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_u_u: 8068; GFX940: ; %bb.0: 8069; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8070; GFX940-NEXT: ;;#ASMSTART 8071; GFX940-NEXT: ; def s[0:1] 8072; GFX940-NEXT: ;;#ASMEND 8073; GFX940-NEXT: s_lshr_b32 s8, s1, 16 8074; GFX940-NEXT: ;;#ASMSTART 8075; GFX940-NEXT: ; use s[8:9] 8076; GFX940-NEXT: ;;#ASMEND 8077; GFX940-NEXT: s_setpc_b64 s[30:31] 8078 %vec0 = call <4 x half> asm "; def $0", "=s"() 8079 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 poison, i32 poison> 8080 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8081 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8082 ret void 8083} 8084 8085define void @s_shuffle_v3f16_v4f16__4_u_u() { 8086; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_u_u: 8087; GFX9: ; %bb.0: 8088; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8089; GFX9-NEXT: ;;#ASMSTART 8090; GFX9-NEXT: ; use s[8:9] 8091; GFX9-NEXT: ;;#ASMEND 8092; GFX9-NEXT: s_setpc_b64 s[30:31] 8093 %vec0 = call <4 x half> asm "; def $0", "=s"() 8094 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 poison, i32 poison> 8095 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8096 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8097 ret void 8098} 8099 8100define void @s_shuffle_v3f16_v4f16__5_u_u() { 8101; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_u_u: 8102; GFX900: ; %bb.0: 8103; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8104; GFX900-NEXT: ;;#ASMSTART 8105; GFX900-NEXT: ; def s[4:5] 8106; GFX900-NEXT: ;;#ASMEND 8107; GFX900-NEXT: s_lshr_b32 s8, s4, 16 8108; GFX900-NEXT: ;;#ASMSTART 8109; GFX900-NEXT: ; use s[8:9] 8110; GFX900-NEXT: ;;#ASMEND 8111; GFX900-NEXT: s_setpc_b64 s[30:31] 8112; 8113; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_u_u: 8114; GFX90A: ; %bb.0: 8115; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8116; GFX90A-NEXT: ;;#ASMSTART 8117; GFX90A-NEXT: ; def s[4:5] 8118; GFX90A-NEXT: ;;#ASMEND 8119; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 8120; GFX90A-NEXT: ;;#ASMSTART 8121; GFX90A-NEXT: ; use s[8:9] 8122; GFX90A-NEXT: ;;#ASMEND 8123; GFX90A-NEXT: s_setpc_b64 s[30:31] 8124; 8125; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_u_u: 8126; GFX940: ; %bb.0: 8127; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8128; GFX940-NEXT: ;;#ASMSTART 8129; GFX940-NEXT: ; def s[0:1] 8130; GFX940-NEXT: ;;#ASMEND 8131; GFX940-NEXT: s_lshr_b32 s8, s0, 16 8132; GFX940-NEXT: ;;#ASMSTART 8133; GFX940-NEXT: ; use s[8:9] 8134; GFX940-NEXT: ;;#ASMEND 8135; GFX940-NEXT: s_setpc_b64 s[30:31] 8136 %vec0 = call <4 x half> asm "; def $0", "=s"() 8137 %vec1 = call <4 x half> asm "; def $0", "=s"() 8138 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 poison, i32 poison> 8139 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8140 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8141 ret void 8142} 8143 8144define void @s_shuffle_v3f16_v4f16__6_u_u() { 8145; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_u_u: 8146; GFX900: ; %bb.0: 8147; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8148; GFX900-NEXT: ;;#ASMSTART 8149; GFX900-NEXT: ; def s[4:5] 8150; GFX900-NEXT: ;;#ASMEND 8151; GFX900-NEXT: s_mov_b32 s8, s5 8152; GFX900-NEXT: ;;#ASMSTART 8153; GFX900-NEXT: ; use s[8:9] 8154; GFX900-NEXT: ;;#ASMEND 8155; GFX900-NEXT: s_setpc_b64 s[30:31] 8156; 8157; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_u_u: 8158; GFX90A: ; %bb.0: 8159; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8160; GFX90A-NEXT: ;;#ASMSTART 8161; GFX90A-NEXT: ; def s[4:5] 8162; GFX90A-NEXT: ;;#ASMEND 8163; GFX90A-NEXT: s_mov_b32 s8, s5 8164; GFX90A-NEXT: ;;#ASMSTART 8165; GFX90A-NEXT: ; use s[8:9] 8166; GFX90A-NEXT: ;;#ASMEND 8167; GFX90A-NEXT: s_setpc_b64 s[30:31] 8168; 8169; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_u_u: 8170; GFX940: ; %bb.0: 8171; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8172; GFX940-NEXT: ;;#ASMSTART 8173; GFX940-NEXT: ; def s[0:1] 8174; GFX940-NEXT: ;;#ASMEND 8175; GFX940-NEXT: s_mov_b32 s8, s1 8176; GFX940-NEXT: ;;#ASMSTART 8177; GFX940-NEXT: ; use s[8:9] 8178; GFX940-NEXT: ;;#ASMEND 8179; GFX940-NEXT: s_setpc_b64 s[30:31] 8180 %vec0 = call <4 x half> asm "; def $0", "=s"() 8181 %vec1 = call <4 x half> asm "; def $0", "=s"() 8182 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 poison, i32 poison> 8183 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8184 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8185 ret void 8186} 8187 8188define void @s_shuffle_v3f16_v4f16__7_u_u() { 8189; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_u: 8190; GFX900: ; %bb.0: 8191; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8192; GFX900-NEXT: ;;#ASMSTART 8193; GFX900-NEXT: ; def s[4:5] 8194; GFX900-NEXT: ;;#ASMEND 8195; GFX900-NEXT: s_lshr_b32 s8, s5, 16 8196; GFX900-NEXT: ;;#ASMSTART 8197; GFX900-NEXT: ; use s[8:9] 8198; GFX900-NEXT: ;;#ASMEND 8199; GFX900-NEXT: s_setpc_b64 s[30:31] 8200; 8201; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_u: 8202; GFX90A: ; %bb.0: 8203; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8204; GFX90A-NEXT: ;;#ASMSTART 8205; GFX90A-NEXT: ; def s[4:5] 8206; GFX90A-NEXT: ;;#ASMEND 8207; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 8208; GFX90A-NEXT: ;;#ASMSTART 8209; GFX90A-NEXT: ; use s[8:9] 8210; GFX90A-NEXT: ;;#ASMEND 8211; GFX90A-NEXT: s_setpc_b64 s[30:31] 8212; 8213; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_u: 8214; GFX940: ; %bb.0: 8215; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8216; GFX940-NEXT: ;;#ASMSTART 8217; GFX940-NEXT: ; def s[0:1] 8218; GFX940-NEXT: ;;#ASMEND 8219; GFX940-NEXT: s_lshr_b32 s8, s1, 16 8220; GFX940-NEXT: ;;#ASMSTART 8221; GFX940-NEXT: ; use s[8:9] 8222; GFX940-NEXT: ;;#ASMEND 8223; GFX940-NEXT: s_setpc_b64 s[30:31] 8224 %vec0 = call <4 x half> asm "; def $0", "=s"() 8225 %vec1 = call <4 x half> asm "; def $0", "=s"() 8226 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 poison> 8227 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8228 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8229 ret void 8230} 8231 8232define void @s_shuffle_v3f16_v4f16__7_0_u() { 8233; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_u: 8234; GFX900: ; %bb.0: 8235; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8236; GFX900-NEXT: ;;#ASMSTART 8237; GFX900-NEXT: ; def s[4:5] 8238; GFX900-NEXT: ;;#ASMEND 8239; GFX900-NEXT: ;;#ASMSTART 8240; GFX900-NEXT: ; def s[6:7] 8241; GFX900-NEXT: ;;#ASMEND 8242; GFX900-NEXT: s_lshr_b32 s5, s7, 16 8243; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8244; GFX900-NEXT: ;;#ASMSTART 8245; GFX900-NEXT: ; use s[8:9] 8246; GFX900-NEXT: ;;#ASMEND 8247; GFX900-NEXT: s_setpc_b64 s[30:31] 8248; 8249; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_u: 8250; GFX90A: ; %bb.0: 8251; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8252; GFX90A-NEXT: ;;#ASMSTART 8253; GFX90A-NEXT: ; def s[4:5] 8254; GFX90A-NEXT: ;;#ASMEND 8255; GFX90A-NEXT: ;;#ASMSTART 8256; GFX90A-NEXT: ; def s[6:7] 8257; GFX90A-NEXT: ;;#ASMEND 8258; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 8259; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8260; GFX90A-NEXT: ;;#ASMSTART 8261; GFX90A-NEXT: ; use s[8:9] 8262; GFX90A-NEXT: ;;#ASMEND 8263; GFX90A-NEXT: s_setpc_b64 s[30:31] 8264; 8265; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_u: 8266; GFX940: ; %bb.0: 8267; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8268; GFX940-NEXT: ;;#ASMSTART 8269; GFX940-NEXT: ; def s[0:1] 8270; GFX940-NEXT: ;;#ASMEND 8271; GFX940-NEXT: ;;#ASMSTART 8272; GFX940-NEXT: ; def s[2:3] 8273; GFX940-NEXT: ;;#ASMEND 8274; GFX940-NEXT: s_lshr_b32 s1, s3, 16 8275; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8276; GFX940-NEXT: ;;#ASMSTART 8277; GFX940-NEXT: ; use s[8:9] 8278; GFX940-NEXT: ;;#ASMEND 8279; GFX940-NEXT: s_setpc_b64 s[30:31] 8280 %vec0 = call <4 x half> asm "; def $0", "=s"() 8281 %vec1 = call <4 x half> asm "; def $0", "=s"() 8282 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 poison> 8283 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8284 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8285 ret void 8286} 8287 8288define void @s_shuffle_v3f16_v4f16__7_1_u() { 8289; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_u: 8290; GFX900: ; %bb.0: 8291; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8292; GFX900-NEXT: ;;#ASMSTART 8293; GFX900-NEXT: ; def s[4:5] 8294; GFX900-NEXT: ;;#ASMEND 8295; GFX900-NEXT: ;;#ASMSTART 8296; GFX900-NEXT: ; def s[6:7] 8297; GFX900-NEXT: ;;#ASMEND 8298; GFX900-NEXT: s_lshr_b32 s4, s4, 16 8299; GFX900-NEXT: s_lshr_b32 s5, s7, 16 8300; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8301; GFX900-NEXT: ;;#ASMSTART 8302; GFX900-NEXT: ; use s[8:9] 8303; GFX900-NEXT: ;;#ASMEND 8304; GFX900-NEXT: s_setpc_b64 s[30:31] 8305; 8306; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_u: 8307; GFX90A: ; %bb.0: 8308; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8309; GFX90A-NEXT: ;;#ASMSTART 8310; GFX90A-NEXT: ; def s[4:5] 8311; GFX90A-NEXT: ;;#ASMEND 8312; GFX90A-NEXT: ;;#ASMSTART 8313; GFX90A-NEXT: ; def s[6:7] 8314; GFX90A-NEXT: ;;#ASMEND 8315; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 8316; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 8317; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8318; GFX90A-NEXT: ;;#ASMSTART 8319; GFX90A-NEXT: ; use s[8:9] 8320; GFX90A-NEXT: ;;#ASMEND 8321; GFX90A-NEXT: s_setpc_b64 s[30:31] 8322; 8323; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_u: 8324; GFX940: ; %bb.0: 8325; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8326; GFX940-NEXT: ;;#ASMSTART 8327; GFX940-NEXT: ; def s[0:1] 8328; GFX940-NEXT: ;;#ASMEND 8329; GFX940-NEXT: ;;#ASMSTART 8330; GFX940-NEXT: ; def s[2:3] 8331; GFX940-NEXT: ;;#ASMEND 8332; GFX940-NEXT: s_lshr_b32 s0, s0, 16 8333; GFX940-NEXT: s_lshr_b32 s1, s3, 16 8334; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8335; GFX940-NEXT: ;;#ASMSTART 8336; GFX940-NEXT: ; use s[8:9] 8337; GFX940-NEXT: ;;#ASMEND 8338; GFX940-NEXT: s_setpc_b64 s[30:31] 8339 %vec0 = call <4 x half> asm "; def $0", "=s"() 8340 %vec1 = call <4 x half> asm "; def $0", "=s"() 8341 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 poison> 8342 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8343 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8344 ret void 8345} 8346 8347define void @s_shuffle_v3f16_v4f16__7_2_u() { 8348; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_u: 8349; GFX900: ; %bb.0: 8350; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8351; GFX900-NEXT: ;;#ASMSTART 8352; GFX900-NEXT: ; def s[4:5] 8353; GFX900-NEXT: ;;#ASMEND 8354; GFX900-NEXT: ;;#ASMSTART 8355; GFX900-NEXT: ; def s[6:7] 8356; GFX900-NEXT: ;;#ASMEND 8357; GFX900-NEXT: s_lshr_b32 s4, s7, 16 8358; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 8359; GFX900-NEXT: ;;#ASMSTART 8360; GFX900-NEXT: ; use s[8:9] 8361; GFX900-NEXT: ;;#ASMEND 8362; GFX900-NEXT: s_setpc_b64 s[30:31] 8363; 8364; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_u: 8365; GFX90A: ; %bb.0: 8366; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8367; GFX90A-NEXT: ;;#ASMSTART 8368; GFX90A-NEXT: ; def s[4:5] 8369; GFX90A-NEXT: ;;#ASMEND 8370; GFX90A-NEXT: ;;#ASMSTART 8371; GFX90A-NEXT: ; def s[6:7] 8372; GFX90A-NEXT: ;;#ASMEND 8373; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 8374; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 8375; GFX90A-NEXT: ;;#ASMSTART 8376; GFX90A-NEXT: ; use s[8:9] 8377; GFX90A-NEXT: ;;#ASMEND 8378; GFX90A-NEXT: s_setpc_b64 s[30:31] 8379; 8380; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_u: 8381; GFX940: ; %bb.0: 8382; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8383; GFX940-NEXT: ;;#ASMSTART 8384; GFX940-NEXT: ; def s[0:1] 8385; GFX940-NEXT: ;;#ASMEND 8386; GFX940-NEXT: ;;#ASMSTART 8387; GFX940-NEXT: ; def s[2:3] 8388; GFX940-NEXT: ;;#ASMEND 8389; GFX940-NEXT: s_lshr_b32 s0, s3, 16 8390; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 8391; GFX940-NEXT: ;;#ASMSTART 8392; GFX940-NEXT: ; use s[8:9] 8393; GFX940-NEXT: ;;#ASMEND 8394; GFX940-NEXT: s_setpc_b64 s[30:31] 8395 %vec0 = call <4 x half> asm "; def $0", "=s"() 8396 %vec1 = call <4 x half> asm "; def $0", "=s"() 8397 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 poison> 8398 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8399 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8400 ret void 8401} 8402 8403define void @s_shuffle_v3f16_v4f16__7_3_u() { 8404; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_u: 8405; GFX900: ; %bb.0: 8406; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8407; GFX900-NEXT: ;;#ASMSTART 8408; GFX900-NEXT: ; def s[4:5] 8409; GFX900-NEXT: ;;#ASMEND 8410; GFX900-NEXT: ;;#ASMSTART 8411; GFX900-NEXT: ; def s[6:7] 8412; GFX900-NEXT: ;;#ASMEND 8413; GFX900-NEXT: s_lshr_b32 s4, s5, 16 8414; GFX900-NEXT: s_lshr_b32 s5, s7, 16 8415; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8416; GFX900-NEXT: ;;#ASMSTART 8417; GFX900-NEXT: ; use s[8:9] 8418; GFX900-NEXT: ;;#ASMEND 8419; GFX900-NEXT: s_setpc_b64 s[30:31] 8420; 8421; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_u: 8422; GFX90A: ; %bb.0: 8423; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8424; GFX90A-NEXT: ;;#ASMSTART 8425; GFX90A-NEXT: ; def s[4:5] 8426; GFX90A-NEXT: ;;#ASMEND 8427; GFX90A-NEXT: ;;#ASMSTART 8428; GFX90A-NEXT: ; def s[6:7] 8429; GFX90A-NEXT: ;;#ASMEND 8430; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 8431; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 8432; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8433; GFX90A-NEXT: ;;#ASMSTART 8434; GFX90A-NEXT: ; use s[8:9] 8435; GFX90A-NEXT: ;;#ASMEND 8436; GFX90A-NEXT: s_setpc_b64 s[30:31] 8437; 8438; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_u: 8439; GFX940: ; %bb.0: 8440; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8441; GFX940-NEXT: ;;#ASMSTART 8442; GFX940-NEXT: ; def s[0:1] 8443; GFX940-NEXT: ;;#ASMEND 8444; GFX940-NEXT: ;;#ASMSTART 8445; GFX940-NEXT: ; def s[2:3] 8446; GFX940-NEXT: ;;#ASMEND 8447; GFX940-NEXT: s_lshr_b32 s0, s1, 16 8448; GFX940-NEXT: s_lshr_b32 s1, s3, 16 8449; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8450; GFX940-NEXT: ;;#ASMSTART 8451; GFX940-NEXT: ; use s[8:9] 8452; GFX940-NEXT: ;;#ASMEND 8453; GFX940-NEXT: s_setpc_b64 s[30:31] 8454 %vec0 = call <4 x half> asm "; def $0", "=s"() 8455 %vec1 = call <4 x half> asm "; def $0", "=s"() 8456 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 poison> 8457 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8458 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8459 ret void 8460} 8461 8462define void @s_shuffle_v3f16_v4f16__7_4_u() { 8463; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_u: 8464; GFX900: ; %bb.0: 8465; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8466; GFX900-NEXT: ;;#ASMSTART 8467; GFX900-NEXT: ; def s[4:5] 8468; GFX900-NEXT: ;;#ASMEND 8469; GFX900-NEXT: s_lshr_b32 s5, s5, 16 8470; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8471; GFX900-NEXT: ;;#ASMSTART 8472; GFX900-NEXT: ; use s[8:9] 8473; GFX900-NEXT: ;;#ASMEND 8474; GFX900-NEXT: s_setpc_b64 s[30:31] 8475; 8476; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_u: 8477; GFX90A: ; %bb.0: 8478; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8479; GFX90A-NEXT: ;;#ASMSTART 8480; GFX90A-NEXT: ; def s[4:5] 8481; GFX90A-NEXT: ;;#ASMEND 8482; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 8483; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8484; GFX90A-NEXT: ;;#ASMSTART 8485; GFX90A-NEXT: ; use s[8:9] 8486; GFX90A-NEXT: ;;#ASMEND 8487; GFX90A-NEXT: s_setpc_b64 s[30:31] 8488; 8489; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_u: 8490; GFX940: ; %bb.0: 8491; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8492; GFX940-NEXT: ;;#ASMSTART 8493; GFX940-NEXT: ; def s[0:1] 8494; GFX940-NEXT: ;;#ASMEND 8495; GFX940-NEXT: s_lshr_b32 s1, s1, 16 8496; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8497; GFX940-NEXT: ;;#ASMSTART 8498; GFX940-NEXT: ; use s[8:9] 8499; GFX940-NEXT: ;;#ASMEND 8500; GFX940-NEXT: s_setpc_b64 s[30:31] 8501 %vec0 = call <4 x half> asm "; def $0", "=s"() 8502 %vec1 = call <4 x half> asm "; def $0", "=s"() 8503 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 poison> 8504 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8505 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8506 ret void 8507} 8508 8509define void @s_shuffle_v3f16_v4f16__7_5_u() { 8510; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_u: 8511; GFX900: ; %bb.0: 8512; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8513; GFX900-NEXT: ;;#ASMSTART 8514; GFX900-NEXT: ; def s[4:5] 8515; GFX900-NEXT: ;;#ASMEND 8516; GFX900-NEXT: s_lshr_b32 s4, s4, 16 8517; GFX900-NEXT: s_lshr_b32 s5, s5, 16 8518; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8519; GFX900-NEXT: ;;#ASMSTART 8520; GFX900-NEXT: ; use s[8:9] 8521; GFX900-NEXT: ;;#ASMEND 8522; GFX900-NEXT: s_setpc_b64 s[30:31] 8523; 8524; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_u: 8525; GFX90A: ; %bb.0: 8526; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8527; GFX90A-NEXT: ;;#ASMSTART 8528; GFX90A-NEXT: ; def s[4:5] 8529; GFX90A-NEXT: ;;#ASMEND 8530; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 8531; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 8532; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 8533; GFX90A-NEXT: ;;#ASMSTART 8534; GFX90A-NEXT: ; use s[8:9] 8535; GFX90A-NEXT: ;;#ASMEND 8536; GFX90A-NEXT: s_setpc_b64 s[30:31] 8537; 8538; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_u: 8539; GFX940: ; %bb.0: 8540; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8541; GFX940-NEXT: ;;#ASMSTART 8542; GFX940-NEXT: ; def s[0:1] 8543; GFX940-NEXT: ;;#ASMEND 8544; GFX940-NEXT: s_lshr_b32 s0, s0, 16 8545; GFX940-NEXT: s_lshr_b32 s1, s1, 16 8546; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 8547; GFX940-NEXT: ;;#ASMSTART 8548; GFX940-NEXT: ; use s[8:9] 8549; GFX940-NEXT: ;;#ASMEND 8550; GFX940-NEXT: s_setpc_b64 s[30:31] 8551 %vec0 = call <4 x half> asm "; def $0", "=s"() 8552 %vec1 = call <4 x half> asm "; def $0", "=s"() 8553 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 poison> 8554 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8555 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8556 ret void 8557} 8558 8559define void @s_shuffle_v3f16_v4f16__7_6_u() { 8560; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_u: 8561; GFX900: ; %bb.0: 8562; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8563; GFX900-NEXT: ;;#ASMSTART 8564; GFX900-NEXT: ; def s[4:5] 8565; GFX900-NEXT: ;;#ASMEND 8566; GFX900-NEXT: s_lshr_b32 s4, s5, 16 8567; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 8568; GFX900-NEXT: ;;#ASMSTART 8569; GFX900-NEXT: ; use s[8:9] 8570; GFX900-NEXT: ;;#ASMEND 8571; GFX900-NEXT: s_setpc_b64 s[30:31] 8572; 8573; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_u: 8574; GFX90A: ; %bb.0: 8575; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8576; GFX90A-NEXT: ;;#ASMSTART 8577; GFX90A-NEXT: ; def s[4:5] 8578; GFX90A-NEXT: ;;#ASMEND 8579; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 8580; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 8581; GFX90A-NEXT: ;;#ASMSTART 8582; GFX90A-NEXT: ; use s[8:9] 8583; GFX90A-NEXT: ;;#ASMEND 8584; GFX90A-NEXT: s_setpc_b64 s[30:31] 8585; 8586; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_u: 8587; GFX940: ; %bb.0: 8588; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8589; GFX940-NEXT: ;;#ASMSTART 8590; GFX940-NEXT: ; def s[0:1] 8591; GFX940-NEXT: ;;#ASMEND 8592; GFX940-NEXT: s_lshr_b32 s0, s1, 16 8593; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 8594; GFX940-NEXT: ;;#ASMSTART 8595; GFX940-NEXT: ; use s[8:9] 8596; GFX940-NEXT: ;;#ASMEND 8597; GFX940-NEXT: s_setpc_b64 s[30:31] 8598 %vec0 = call <4 x half> asm "; def $0", "=s"() 8599 %vec1 = call <4 x half> asm "; def $0", "=s"() 8600 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 poison> 8601 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8602 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8603 ret void 8604} 8605 8606define void @s_shuffle_v3f16_v4f16__7_7_u() { 8607; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_u: 8608; GFX900: ; %bb.0: 8609; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8610; GFX900-NEXT: ;;#ASMSTART 8611; GFX900-NEXT: ; def s[4:5] 8612; GFX900-NEXT: ;;#ASMEND 8613; GFX900-NEXT: s_lshr_b32 s4, s5, 16 8614; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8615; GFX900-NEXT: ;;#ASMSTART 8616; GFX900-NEXT: ; use s[8:9] 8617; GFX900-NEXT: ;;#ASMEND 8618; GFX900-NEXT: s_setpc_b64 s[30:31] 8619; 8620; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_u: 8621; GFX90A: ; %bb.0: 8622; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8623; GFX90A-NEXT: ;;#ASMSTART 8624; GFX90A-NEXT: ; def s[4:5] 8625; GFX90A-NEXT: ;;#ASMEND 8626; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 8627; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8628; GFX90A-NEXT: ;;#ASMSTART 8629; GFX90A-NEXT: ; use s[8:9] 8630; GFX90A-NEXT: ;;#ASMEND 8631; GFX90A-NEXT: s_setpc_b64 s[30:31] 8632; 8633; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_u: 8634; GFX940: ; %bb.0: 8635; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8636; GFX940-NEXT: ;;#ASMSTART 8637; GFX940-NEXT: ; def s[0:1] 8638; GFX940-NEXT: ;;#ASMEND 8639; GFX940-NEXT: s_lshr_b32 s0, s1, 16 8640; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 8641; GFX940-NEXT: ;;#ASMSTART 8642; GFX940-NEXT: ; use s[8:9] 8643; GFX940-NEXT: ;;#ASMEND 8644; GFX940-NEXT: s_setpc_b64 s[30:31] 8645 %vec0 = call <4 x half> asm "; def $0", "=s"() 8646 %vec1 = call <4 x half> asm "; def $0", "=s"() 8647 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 poison> 8648 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8649 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8650 ret void 8651} 8652 8653define void @s_shuffle_v3f16_v4f16__7_7_0() { 8654; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_0: 8655; GFX900: ; %bb.0: 8656; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8657; GFX900-NEXT: ;;#ASMSTART 8658; GFX900-NEXT: ; def s[4:5] 8659; GFX900-NEXT: ;;#ASMEND 8660; GFX900-NEXT: ;;#ASMSTART 8661; GFX900-NEXT: ; def s[6:7] 8662; GFX900-NEXT: ;;#ASMEND 8663; GFX900-NEXT: s_lshr_b32 s5, s7, 16 8664; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8665; GFX900-NEXT: s_mov_b32 s9, s4 8666; GFX900-NEXT: ;;#ASMSTART 8667; GFX900-NEXT: ; use s[8:9] 8668; GFX900-NEXT: ;;#ASMEND 8669; GFX900-NEXT: s_setpc_b64 s[30:31] 8670; 8671; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_0: 8672; GFX90A: ; %bb.0: 8673; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8674; GFX90A-NEXT: ;;#ASMSTART 8675; GFX90A-NEXT: ; def s[4:5] 8676; GFX90A-NEXT: ;;#ASMEND 8677; GFX90A-NEXT: ;;#ASMSTART 8678; GFX90A-NEXT: ; def s[6:7] 8679; GFX90A-NEXT: ;;#ASMEND 8680; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 8681; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8682; GFX90A-NEXT: s_mov_b32 s9, s4 8683; GFX90A-NEXT: ;;#ASMSTART 8684; GFX90A-NEXT: ; use s[8:9] 8685; GFX90A-NEXT: ;;#ASMEND 8686; GFX90A-NEXT: s_setpc_b64 s[30:31] 8687; 8688; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_0: 8689; GFX940: ; %bb.0: 8690; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8691; GFX940-NEXT: ;;#ASMSTART 8692; GFX940-NEXT: ; def s[0:1] 8693; GFX940-NEXT: ;;#ASMEND 8694; GFX940-NEXT: ;;#ASMSTART 8695; GFX940-NEXT: ; def s[2:3] 8696; GFX940-NEXT: ;;#ASMEND 8697; GFX940-NEXT: s_lshr_b32 s1, s3, 16 8698; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 8699; GFX940-NEXT: s_mov_b32 s9, s0 8700; GFX940-NEXT: ;;#ASMSTART 8701; GFX940-NEXT: ; use s[8:9] 8702; GFX940-NEXT: ;;#ASMEND 8703; GFX940-NEXT: s_setpc_b64 s[30:31] 8704 %vec0 = call <4 x half> asm "; def $0", "=s"() 8705 %vec1 = call <4 x half> asm "; def $0", "=s"() 8706 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 0> 8707 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8708 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8709 ret void 8710} 8711 8712define void @s_shuffle_v3f16_v4f16__7_7_1() { 8713; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_1: 8714; GFX900: ; %bb.0: 8715; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8716; GFX900-NEXT: ;;#ASMSTART 8717; GFX900-NEXT: ; def s[4:5] 8718; GFX900-NEXT: ;;#ASMEND 8719; GFX900-NEXT: ;;#ASMSTART 8720; GFX900-NEXT: ; def s[6:7] 8721; GFX900-NEXT: ;;#ASMEND 8722; GFX900-NEXT: s_lshr_b32 s9, s4, 16 8723; GFX900-NEXT: s_lshr_b32 s4, s7, 16 8724; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8725; GFX900-NEXT: ;;#ASMSTART 8726; GFX900-NEXT: ; use s[8:9] 8727; GFX900-NEXT: ;;#ASMEND 8728; GFX900-NEXT: s_setpc_b64 s[30:31] 8729; 8730; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_1: 8731; GFX90A: ; %bb.0: 8732; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8733; GFX90A-NEXT: ;;#ASMSTART 8734; GFX90A-NEXT: ; def s[4:5] 8735; GFX90A-NEXT: ;;#ASMEND 8736; GFX90A-NEXT: ;;#ASMSTART 8737; GFX90A-NEXT: ; def s[6:7] 8738; GFX90A-NEXT: ;;#ASMEND 8739; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 8740; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 8741; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8742; GFX90A-NEXT: ;;#ASMSTART 8743; GFX90A-NEXT: ; use s[8:9] 8744; GFX90A-NEXT: ;;#ASMEND 8745; GFX90A-NEXT: s_setpc_b64 s[30:31] 8746; 8747; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_1: 8748; GFX940: ; %bb.0: 8749; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8750; GFX940-NEXT: ;;#ASMSTART 8751; GFX940-NEXT: ; def s[0:1] 8752; GFX940-NEXT: ;;#ASMEND 8753; GFX940-NEXT: ;;#ASMSTART 8754; GFX940-NEXT: ; def s[2:3] 8755; GFX940-NEXT: ;;#ASMEND 8756; GFX940-NEXT: s_lshr_b32 s9, s0, 16 8757; GFX940-NEXT: s_lshr_b32 s0, s3, 16 8758; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 8759; GFX940-NEXT: ;;#ASMSTART 8760; GFX940-NEXT: ; use s[8:9] 8761; GFX940-NEXT: ;;#ASMEND 8762; GFX940-NEXT: s_setpc_b64 s[30:31] 8763 %vec0 = call <4 x half> asm "; def $0", "=s"() 8764 %vec1 = call <4 x half> asm "; def $0", "=s"() 8765 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 1> 8766 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8767 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8768 ret void 8769} 8770 8771define void @s_shuffle_v3f16_v4f16__7_7_2() { 8772; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_2: 8773; GFX900: ; %bb.0: 8774; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8775; GFX900-NEXT: ;;#ASMSTART 8776; GFX900-NEXT: ; def s[4:5] 8777; GFX900-NEXT: ;;#ASMEND 8778; GFX900-NEXT: ;;#ASMSTART 8779; GFX900-NEXT: ; def s[8:9] 8780; GFX900-NEXT: ;;#ASMEND 8781; GFX900-NEXT: s_lshr_b32 s4, s5, 16 8782; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8783; GFX900-NEXT: ;;#ASMSTART 8784; GFX900-NEXT: ; use s[8:9] 8785; GFX900-NEXT: ;;#ASMEND 8786; GFX900-NEXT: s_setpc_b64 s[30:31] 8787; 8788; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_2: 8789; GFX90A: ; %bb.0: 8790; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8791; GFX90A-NEXT: ;;#ASMSTART 8792; GFX90A-NEXT: ; def s[4:5] 8793; GFX90A-NEXT: ;;#ASMEND 8794; GFX90A-NEXT: ;;#ASMSTART 8795; GFX90A-NEXT: ; def s[8:9] 8796; GFX90A-NEXT: ;;#ASMEND 8797; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 8798; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8799; GFX90A-NEXT: ;;#ASMSTART 8800; GFX90A-NEXT: ; use s[8:9] 8801; GFX90A-NEXT: ;;#ASMEND 8802; GFX90A-NEXT: s_setpc_b64 s[30:31] 8803; 8804; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_2: 8805; GFX940: ; %bb.0: 8806; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8807; GFX940-NEXT: ;;#ASMSTART 8808; GFX940-NEXT: ; def s[0:1] 8809; GFX940-NEXT: ;;#ASMEND 8810; GFX940-NEXT: ;;#ASMSTART 8811; GFX940-NEXT: ; def s[8:9] 8812; GFX940-NEXT: ;;#ASMEND 8813; GFX940-NEXT: s_lshr_b32 s0, s1, 16 8814; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 8815; GFX940-NEXT: ;;#ASMSTART 8816; GFX940-NEXT: ; use s[8:9] 8817; GFX940-NEXT: ;;#ASMEND 8818; GFX940-NEXT: s_setpc_b64 s[30:31] 8819 %vec0 = call <4 x half> asm "; def $0", "=s"() 8820 %vec1 = call <4 x half> asm "; def $0", "=s"() 8821 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 2> 8822 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8823 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8824 ret void 8825} 8826 8827define void @s_shuffle_v3f16_v4f16__7_7_3() { 8828; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_3: 8829; GFX900: ; %bb.0: 8830; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8831; GFX900-NEXT: ;;#ASMSTART 8832; GFX900-NEXT: ; def s[4:5] 8833; GFX900-NEXT: ;;#ASMEND 8834; GFX900-NEXT: ;;#ASMSTART 8835; GFX900-NEXT: ; def s[6:7] 8836; GFX900-NEXT: ;;#ASMEND 8837; GFX900-NEXT: s_lshr_b32 s4, s7, 16 8838; GFX900-NEXT: s_lshr_b32 s9, s5, 16 8839; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8840; GFX900-NEXT: ;;#ASMSTART 8841; GFX900-NEXT: ; use s[8:9] 8842; GFX900-NEXT: ;;#ASMEND 8843; GFX900-NEXT: s_setpc_b64 s[30:31] 8844; 8845; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_3: 8846; GFX90A: ; %bb.0: 8847; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8848; GFX90A-NEXT: ;;#ASMSTART 8849; GFX90A-NEXT: ; def s[4:5] 8850; GFX90A-NEXT: ;;#ASMEND 8851; GFX90A-NEXT: ;;#ASMSTART 8852; GFX90A-NEXT: ; def s[6:7] 8853; GFX90A-NEXT: ;;#ASMEND 8854; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 8855; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 8856; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8857; GFX90A-NEXT: ;;#ASMSTART 8858; GFX90A-NEXT: ; use s[8:9] 8859; GFX90A-NEXT: ;;#ASMEND 8860; GFX90A-NEXT: s_setpc_b64 s[30:31] 8861; 8862; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_3: 8863; GFX940: ; %bb.0: 8864; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8865; GFX940-NEXT: ;;#ASMSTART 8866; GFX940-NEXT: ; def s[0:1] 8867; GFX940-NEXT: ;;#ASMEND 8868; GFX940-NEXT: ;;#ASMSTART 8869; GFX940-NEXT: ; def s[2:3] 8870; GFX940-NEXT: ;;#ASMEND 8871; GFX940-NEXT: s_lshr_b32 s0, s3, 16 8872; GFX940-NEXT: s_lshr_b32 s9, s1, 16 8873; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 8874; GFX940-NEXT: ;;#ASMSTART 8875; GFX940-NEXT: ; use s[8:9] 8876; GFX940-NEXT: ;;#ASMEND 8877; GFX940-NEXT: s_setpc_b64 s[30:31] 8878 %vec0 = call <4 x half> asm "; def $0", "=s"() 8879 %vec1 = call <4 x half> asm "; def $0", "=s"() 8880 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 3> 8881 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8882 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8883 ret void 8884} 8885 8886define void @s_shuffle_v3f16_v4f16__7_7_4() { 8887; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_4: 8888; GFX900: ; %bb.0: 8889; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8890; GFX900-NEXT: ;;#ASMSTART 8891; GFX900-NEXT: ; def s[4:5] 8892; GFX900-NEXT: ;;#ASMEND 8893; GFX900-NEXT: s_lshr_b32 s5, s5, 16 8894; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8895; GFX900-NEXT: s_mov_b32 s9, s4 8896; GFX900-NEXT: ;;#ASMSTART 8897; GFX900-NEXT: ; use s[8:9] 8898; GFX900-NEXT: ;;#ASMEND 8899; GFX900-NEXT: s_setpc_b64 s[30:31] 8900; 8901; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_4: 8902; GFX90A: ; %bb.0: 8903; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8904; GFX90A-NEXT: ;;#ASMSTART 8905; GFX90A-NEXT: ; def s[4:5] 8906; GFX90A-NEXT: ;;#ASMEND 8907; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 8908; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 8909; GFX90A-NEXT: s_mov_b32 s9, s4 8910; GFX90A-NEXT: ;;#ASMSTART 8911; GFX90A-NEXT: ; use s[8:9] 8912; GFX90A-NEXT: ;;#ASMEND 8913; GFX90A-NEXT: s_setpc_b64 s[30:31] 8914; 8915; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_4: 8916; GFX940: ; %bb.0: 8917; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8918; GFX940-NEXT: ;;#ASMSTART 8919; GFX940-NEXT: ; def s[0:1] 8920; GFX940-NEXT: ;;#ASMEND 8921; GFX940-NEXT: s_lshr_b32 s1, s1, 16 8922; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 8923; GFX940-NEXT: s_mov_b32 s9, s0 8924; GFX940-NEXT: ;;#ASMSTART 8925; GFX940-NEXT: ; use s[8:9] 8926; GFX940-NEXT: ;;#ASMEND 8927; GFX940-NEXT: s_setpc_b64 s[30:31] 8928 %vec0 = call <4 x half> asm "; def $0", "=s"() 8929 %vec1 = call <4 x half> asm "; def $0", "=s"() 8930 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 4> 8931 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8932 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8933 ret void 8934} 8935 8936define void @s_shuffle_v3f16_v4f16__7_7_5() { 8937; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_5: 8938; GFX900: ; %bb.0: 8939; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8940; GFX900-NEXT: ;;#ASMSTART 8941; GFX900-NEXT: ; def s[4:5] 8942; GFX900-NEXT: ;;#ASMEND 8943; GFX900-NEXT: s_lshr_b32 s9, s4, 16 8944; GFX900-NEXT: s_lshr_b32 s4, s5, 16 8945; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8946; GFX900-NEXT: ;;#ASMSTART 8947; GFX900-NEXT: ; use s[8:9] 8948; GFX900-NEXT: ;;#ASMEND 8949; GFX900-NEXT: s_setpc_b64 s[30:31] 8950; 8951; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_5: 8952; GFX90A: ; %bb.0: 8953; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8954; GFX90A-NEXT: ;;#ASMSTART 8955; GFX90A-NEXT: ; def s[4:5] 8956; GFX90A-NEXT: ;;#ASMEND 8957; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 8958; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 8959; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8960; GFX90A-NEXT: ;;#ASMSTART 8961; GFX90A-NEXT: ; use s[8:9] 8962; GFX90A-NEXT: ;;#ASMEND 8963; GFX90A-NEXT: s_setpc_b64 s[30:31] 8964; 8965; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_5: 8966; GFX940: ; %bb.0: 8967; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8968; GFX940-NEXT: ;;#ASMSTART 8969; GFX940-NEXT: ; def s[0:1] 8970; GFX940-NEXT: ;;#ASMEND 8971; GFX940-NEXT: s_lshr_b32 s9, s0, 16 8972; GFX940-NEXT: s_lshr_b32 s0, s1, 16 8973; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 8974; GFX940-NEXT: ;;#ASMSTART 8975; GFX940-NEXT: ; use s[8:9] 8976; GFX940-NEXT: ;;#ASMEND 8977; GFX940-NEXT: s_setpc_b64 s[30:31] 8978 %vec0 = call <4 x half> asm "; def $0", "=s"() 8979 %vec1 = call <4 x half> asm "; def $0", "=s"() 8980 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 5> 8981 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 8982 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 8983 ret void 8984} 8985 8986define void @s_shuffle_v3f16_v4f16__7_7_6() { 8987; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_6: 8988; GFX900: ; %bb.0: 8989; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 8990; GFX900-NEXT: ;;#ASMSTART 8991; GFX900-NEXT: ; def s[8:9] 8992; GFX900-NEXT: ;;#ASMEND 8993; GFX900-NEXT: s_lshr_b32 s4, s9, 16 8994; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 8995; GFX900-NEXT: ;;#ASMSTART 8996; GFX900-NEXT: ; use s[8:9] 8997; GFX900-NEXT: ;;#ASMEND 8998; GFX900-NEXT: s_setpc_b64 s[30:31] 8999; 9000; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_6: 9001; GFX90A: ; %bb.0: 9002; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9003; GFX90A-NEXT: ;;#ASMSTART 9004; GFX90A-NEXT: ; def s[8:9] 9005; GFX90A-NEXT: ;;#ASMEND 9006; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 9007; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 9008; GFX90A-NEXT: ;;#ASMSTART 9009; GFX90A-NEXT: ; use s[8:9] 9010; GFX90A-NEXT: ;;#ASMEND 9011; GFX90A-NEXT: s_setpc_b64 s[30:31] 9012; 9013; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_6: 9014; GFX940: ; %bb.0: 9015; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9016; GFX940-NEXT: ;;#ASMSTART 9017; GFX940-NEXT: ; def s[8:9] 9018; GFX940-NEXT: ;;#ASMEND 9019; GFX940-NEXT: s_lshr_b32 s0, s9, 16 9020; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 9021; GFX940-NEXT: ;;#ASMSTART 9022; GFX940-NEXT: ; use s[8:9] 9023; GFX940-NEXT: ;;#ASMEND 9024; GFX940-NEXT: s_setpc_b64 s[30:31] 9025 %vec0 = call <4 x half> asm "; def $0", "=s"() 9026 %vec1 = call <4 x half> asm "; def $0", "=s"() 9027 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 6> 9028 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9029 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9030 ret void 9031} 9032 9033define void @s_shuffle_v3f16_v4f16__7_7_7() { 9034; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_7_7: 9035; GFX900: ; %bb.0: 9036; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9037; GFX900-NEXT: ;;#ASMSTART 9038; GFX900-NEXT: ; def s[4:5] 9039; GFX900-NEXT: ;;#ASMEND 9040; GFX900-NEXT: s_lshr_b32 s9, s5, 16 9041; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9 9042; GFX900-NEXT: ;;#ASMSTART 9043; GFX900-NEXT: ; use s[8:9] 9044; GFX900-NEXT: ;;#ASMEND 9045; GFX900-NEXT: s_setpc_b64 s[30:31] 9046; 9047; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_7_7: 9048; GFX90A: ; %bb.0: 9049; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9050; GFX90A-NEXT: ;;#ASMSTART 9051; GFX90A-NEXT: ; def s[4:5] 9052; GFX90A-NEXT: ;;#ASMEND 9053; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 9054; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9 9055; GFX90A-NEXT: ;;#ASMSTART 9056; GFX90A-NEXT: ; use s[8:9] 9057; GFX90A-NEXT: ;;#ASMEND 9058; GFX90A-NEXT: s_setpc_b64 s[30:31] 9059; 9060; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_7_7: 9061; GFX940: ; %bb.0: 9062; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9063; GFX940-NEXT: ;;#ASMSTART 9064; GFX940-NEXT: ; def s[0:1] 9065; GFX940-NEXT: ;;#ASMEND 9066; GFX940-NEXT: s_lshr_b32 s9, s1, 16 9067; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 9068; GFX940-NEXT: ;;#ASMSTART 9069; GFX940-NEXT: ; use s[8:9] 9070; GFX940-NEXT: ;;#ASMEND 9071; GFX940-NEXT: s_setpc_b64 s[30:31] 9072 %vec0 = call <4 x half> asm "; def $0", "=s"() 9073 %vec1 = call <4 x half> asm "; def $0", "=s"() 9074 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 7, i32 7> 9075 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9076 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9077 ret void 9078} 9079 9080define void @s_shuffle_v3f16_v4f16__u_0_0() { 9081; GFX900-LABEL: s_shuffle_v3f16_v4f16__u_0_0: 9082; GFX900: ; %bb.0: 9083; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9084; GFX900-NEXT: ;;#ASMSTART 9085; GFX900-NEXT: ; def s[4:5] 9086; GFX900-NEXT: ;;#ASMEND 9087; GFX900-NEXT: s_lshl_b32 s8, s4, 16 9088; GFX900-NEXT: s_mov_b32 s9, s4 9089; GFX900-NEXT: ;;#ASMSTART 9090; GFX900-NEXT: ; use s[8:9] 9091; GFX900-NEXT: ;;#ASMEND 9092; GFX900-NEXT: s_setpc_b64 s[30:31] 9093; 9094; GFX90A-LABEL: s_shuffle_v3f16_v4f16__u_0_0: 9095; GFX90A: ; %bb.0: 9096; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9097; GFX90A-NEXT: ;;#ASMSTART 9098; GFX90A-NEXT: ; def s[4:5] 9099; GFX90A-NEXT: ;;#ASMEND 9100; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 9101; GFX90A-NEXT: s_mov_b32 s9, s4 9102; GFX90A-NEXT: ;;#ASMSTART 9103; GFX90A-NEXT: ; use s[8:9] 9104; GFX90A-NEXT: ;;#ASMEND 9105; GFX90A-NEXT: s_setpc_b64 s[30:31] 9106; 9107; GFX940-LABEL: s_shuffle_v3f16_v4f16__u_0_0: 9108; GFX940: ; %bb.0: 9109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9110; GFX940-NEXT: ;;#ASMSTART 9111; GFX940-NEXT: ; def s[0:1] 9112; GFX940-NEXT: ;;#ASMEND 9113; GFX940-NEXT: s_lshl_b32 s8, s0, 16 9114; GFX940-NEXT: s_mov_b32 s9, s0 9115; GFX940-NEXT: ;;#ASMSTART 9116; GFX940-NEXT: ; use s[8:9] 9117; GFX940-NEXT: ;;#ASMEND 9118; GFX940-NEXT: s_setpc_b64 s[30:31] 9119 %vec0 = call <4 x half> asm "; def $0", "=s"() 9120 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 0, i32 0> 9121 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9122 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9123 ret void 9124} 9125 9126define void @s_shuffle_v3f16_v4f16__0_0_0() { 9127; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_0_0: 9128; GFX900: ; %bb.0: 9129; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9130; GFX900-NEXT: ;;#ASMSTART 9131; GFX900-NEXT: ; def s[4:5] 9132; GFX900-NEXT: ;;#ASMEND 9133; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 9134; GFX900-NEXT: s_mov_b32 s9, s4 9135; GFX900-NEXT: ;;#ASMSTART 9136; GFX900-NEXT: ; use s[8:9] 9137; GFX900-NEXT: ;;#ASMEND 9138; GFX900-NEXT: s_setpc_b64 s[30:31] 9139; 9140; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_0_0: 9141; GFX90A: ; %bb.0: 9142; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9143; GFX90A-NEXT: ;;#ASMSTART 9144; GFX90A-NEXT: ; def s[4:5] 9145; GFX90A-NEXT: ;;#ASMEND 9146; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 9147; GFX90A-NEXT: s_mov_b32 s9, s4 9148; GFX90A-NEXT: ;;#ASMSTART 9149; GFX90A-NEXT: ; use s[8:9] 9150; GFX90A-NEXT: ;;#ASMEND 9151; GFX90A-NEXT: s_setpc_b64 s[30:31] 9152; 9153; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_0_0: 9154; GFX940: ; %bb.0: 9155; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9156; GFX940-NEXT: ;;#ASMSTART 9157; GFX940-NEXT: ; def s[0:1] 9158; GFX940-NEXT: ;;#ASMEND 9159; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 9160; GFX940-NEXT: s_mov_b32 s9, s0 9161; GFX940-NEXT: ;;#ASMSTART 9162; GFX940-NEXT: ; use s[8:9] 9163; GFX940-NEXT: ;;#ASMEND 9164; GFX940-NEXT: s_setpc_b64 s[30:31] 9165 %vec0 = call <4 x half> asm "; def $0", "=s"() 9166 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> zeroinitializer 9167 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9168 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9169 ret void 9170} 9171 9172define void @s_shuffle_v3f16_v4f16__1_0_0() { 9173; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_0_0: 9174; GFX900: ; %bb.0: 9175; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9176; GFX900-NEXT: ;;#ASMSTART 9177; GFX900-NEXT: ; def s[4:5] 9178; GFX900-NEXT: ;;#ASMEND 9179; GFX900-NEXT: s_lshr_b32 s5, s4, 16 9180; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9181; GFX900-NEXT: s_mov_b32 s9, s4 9182; GFX900-NEXT: ;;#ASMSTART 9183; GFX900-NEXT: ; use s[8:9] 9184; GFX900-NEXT: ;;#ASMEND 9185; GFX900-NEXT: s_setpc_b64 s[30:31] 9186; 9187; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_0_0: 9188; GFX90A: ; %bb.0: 9189; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9190; GFX90A-NEXT: ;;#ASMSTART 9191; GFX90A-NEXT: ; def s[4:5] 9192; GFX90A-NEXT: ;;#ASMEND 9193; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 9194; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9195; GFX90A-NEXT: s_mov_b32 s9, s4 9196; GFX90A-NEXT: ;;#ASMSTART 9197; GFX90A-NEXT: ; use s[8:9] 9198; GFX90A-NEXT: ;;#ASMEND 9199; GFX90A-NEXT: s_setpc_b64 s[30:31] 9200; 9201; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_0_0: 9202; GFX940: ; %bb.0: 9203; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9204; GFX940-NEXT: ;;#ASMSTART 9205; GFX940-NEXT: ; def s[0:1] 9206; GFX940-NEXT: ;;#ASMEND 9207; GFX940-NEXT: s_lshr_b32 s1, s0, 16 9208; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9209; GFX940-NEXT: s_mov_b32 s9, s0 9210; GFX940-NEXT: ;;#ASMSTART 9211; GFX940-NEXT: ; use s[8:9] 9212; GFX940-NEXT: ;;#ASMEND 9213; GFX940-NEXT: s_setpc_b64 s[30:31] 9214 %vec0 = call <4 x half> asm "; def $0", "=s"() 9215 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 0, i32 0> 9216 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9217 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9218 ret void 9219} 9220 9221define void @s_shuffle_v3f16_v4f16__2_0_0() { 9222; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_0_0: 9223; GFX900: ; %bb.0: 9224; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9225; GFX900-NEXT: ;;#ASMSTART 9226; GFX900-NEXT: ; def s[4:5] 9227; GFX900-NEXT: ;;#ASMEND 9228; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9229; GFX900-NEXT: s_mov_b32 s9, s4 9230; GFX900-NEXT: ;;#ASMSTART 9231; GFX900-NEXT: ; use s[8:9] 9232; GFX900-NEXT: ;;#ASMEND 9233; GFX900-NEXT: s_setpc_b64 s[30:31] 9234; 9235; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_0_0: 9236; GFX90A: ; %bb.0: 9237; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9238; GFX90A-NEXT: ;;#ASMSTART 9239; GFX90A-NEXT: ; def s[4:5] 9240; GFX90A-NEXT: ;;#ASMEND 9241; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9242; GFX90A-NEXT: s_mov_b32 s9, s4 9243; GFX90A-NEXT: ;;#ASMSTART 9244; GFX90A-NEXT: ; use s[8:9] 9245; GFX90A-NEXT: ;;#ASMEND 9246; GFX90A-NEXT: s_setpc_b64 s[30:31] 9247; 9248; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_0_0: 9249; GFX940: ; %bb.0: 9250; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9251; GFX940-NEXT: ;;#ASMSTART 9252; GFX940-NEXT: ; def s[0:1] 9253; GFX940-NEXT: ;;#ASMEND 9254; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9255; GFX940-NEXT: s_mov_b32 s9, s0 9256; GFX940-NEXT: ;;#ASMSTART 9257; GFX940-NEXT: ; use s[8:9] 9258; GFX940-NEXT: ;;#ASMEND 9259; GFX940-NEXT: s_setpc_b64 s[30:31] 9260 %vec0 = call <4 x half> asm "; def $0", "=s"() 9261 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 0, i32 0> 9262 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9263 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9264 ret void 9265} 9266 9267define void @s_shuffle_v3f16_v4f16__3_0_0() { 9268; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_0_0: 9269; GFX900: ; %bb.0: 9270; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9271; GFX900-NEXT: ;;#ASMSTART 9272; GFX900-NEXT: ; def s[4:5] 9273; GFX900-NEXT: ;;#ASMEND 9274; GFX900-NEXT: s_lshr_b32 s5, s5, 16 9275; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9276; GFX900-NEXT: s_mov_b32 s9, s4 9277; GFX900-NEXT: ;;#ASMSTART 9278; GFX900-NEXT: ; use s[8:9] 9279; GFX900-NEXT: ;;#ASMEND 9280; GFX900-NEXT: s_setpc_b64 s[30:31] 9281; 9282; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_0_0: 9283; GFX90A: ; %bb.0: 9284; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9285; GFX90A-NEXT: ;;#ASMSTART 9286; GFX90A-NEXT: ; def s[4:5] 9287; GFX90A-NEXT: ;;#ASMEND 9288; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 9289; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9290; GFX90A-NEXT: s_mov_b32 s9, s4 9291; GFX90A-NEXT: ;;#ASMSTART 9292; GFX90A-NEXT: ; use s[8:9] 9293; GFX90A-NEXT: ;;#ASMEND 9294; GFX90A-NEXT: s_setpc_b64 s[30:31] 9295; 9296; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_0_0: 9297; GFX940: ; %bb.0: 9298; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9299; GFX940-NEXT: ;;#ASMSTART 9300; GFX940-NEXT: ; def s[0:1] 9301; GFX940-NEXT: ;;#ASMEND 9302; GFX940-NEXT: s_lshr_b32 s1, s1, 16 9303; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9304; GFX940-NEXT: s_mov_b32 s9, s0 9305; GFX940-NEXT: ;;#ASMSTART 9306; GFX940-NEXT: ; use s[8:9] 9307; GFX940-NEXT: ;;#ASMEND 9308; GFX940-NEXT: s_setpc_b64 s[30:31] 9309 %vec0 = call <4 x half> asm "; def $0", "=s"() 9310 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 0, i32 0> 9311 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9312 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9313 ret void 9314} 9315 9316define void @s_shuffle_v3f16_v4f16__4_0_0() { 9317; GFX900-LABEL: s_shuffle_v3f16_v4f16__4_0_0: 9318; GFX900: ; %bb.0: 9319; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9320; GFX900-NEXT: ;;#ASMSTART 9321; GFX900-NEXT: ; def s[4:5] 9322; GFX900-NEXT: ;;#ASMEND 9323; GFX900-NEXT: s_lshl_b32 s8, s4, 16 9324; GFX900-NEXT: s_mov_b32 s9, s4 9325; GFX900-NEXT: ;;#ASMSTART 9326; GFX900-NEXT: ; use s[8:9] 9327; GFX900-NEXT: ;;#ASMEND 9328; GFX900-NEXT: s_setpc_b64 s[30:31] 9329; 9330; GFX90A-LABEL: s_shuffle_v3f16_v4f16__4_0_0: 9331; GFX90A: ; %bb.0: 9332; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9333; GFX90A-NEXT: ;;#ASMSTART 9334; GFX90A-NEXT: ; def s[4:5] 9335; GFX90A-NEXT: ;;#ASMEND 9336; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 9337; GFX90A-NEXT: s_mov_b32 s9, s4 9338; GFX90A-NEXT: ;;#ASMSTART 9339; GFX90A-NEXT: ; use s[8:9] 9340; GFX90A-NEXT: ;;#ASMEND 9341; GFX90A-NEXT: s_setpc_b64 s[30:31] 9342; 9343; GFX940-LABEL: s_shuffle_v3f16_v4f16__4_0_0: 9344; GFX940: ; %bb.0: 9345; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9346; GFX940-NEXT: ;;#ASMSTART 9347; GFX940-NEXT: ; def s[0:1] 9348; GFX940-NEXT: ;;#ASMEND 9349; GFX940-NEXT: s_lshl_b32 s8, s0, 16 9350; GFX940-NEXT: s_mov_b32 s9, s0 9351; GFX940-NEXT: ;;#ASMSTART 9352; GFX940-NEXT: ; use s[8:9] 9353; GFX940-NEXT: ;;#ASMEND 9354; GFX940-NEXT: s_setpc_b64 s[30:31] 9355 %vec0 = call <4 x half> asm "; def $0", "=s"() 9356 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 0, i32 0> 9357 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9358 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9359 ret void 9360} 9361 9362define void @s_shuffle_v3f16_v4f16__5_0_0() { 9363; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_0_0: 9364; GFX900: ; %bb.0: 9365; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9366; GFX900-NEXT: ;;#ASMSTART 9367; GFX900-NEXT: ; def s[4:5] 9368; GFX900-NEXT: ;;#ASMEND 9369; GFX900-NEXT: ;;#ASMSTART 9370; GFX900-NEXT: ; def s[6:7] 9371; GFX900-NEXT: ;;#ASMEND 9372; GFX900-NEXT: s_lshr_b32 s5, s6, 16 9373; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9374; GFX900-NEXT: s_mov_b32 s9, s4 9375; GFX900-NEXT: ;;#ASMSTART 9376; GFX900-NEXT: ; use s[8:9] 9377; GFX900-NEXT: ;;#ASMEND 9378; GFX900-NEXT: s_setpc_b64 s[30:31] 9379; 9380; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_0_0: 9381; GFX90A: ; %bb.0: 9382; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9383; GFX90A-NEXT: ;;#ASMSTART 9384; GFX90A-NEXT: ; def s[4:5] 9385; GFX90A-NEXT: ;;#ASMEND 9386; GFX90A-NEXT: ;;#ASMSTART 9387; GFX90A-NEXT: ; def s[6:7] 9388; GFX90A-NEXT: ;;#ASMEND 9389; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 9390; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9391; GFX90A-NEXT: s_mov_b32 s9, s4 9392; GFX90A-NEXT: ;;#ASMSTART 9393; GFX90A-NEXT: ; use s[8:9] 9394; GFX90A-NEXT: ;;#ASMEND 9395; GFX90A-NEXT: s_setpc_b64 s[30:31] 9396; 9397; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_0_0: 9398; GFX940: ; %bb.0: 9399; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9400; GFX940-NEXT: ;;#ASMSTART 9401; GFX940-NEXT: ; def s[0:1] 9402; GFX940-NEXT: ;;#ASMEND 9403; GFX940-NEXT: ;;#ASMSTART 9404; GFX940-NEXT: ; def s[2:3] 9405; GFX940-NEXT: ;;#ASMEND 9406; GFX940-NEXT: s_lshr_b32 s1, s2, 16 9407; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9408; GFX940-NEXT: s_mov_b32 s9, s0 9409; GFX940-NEXT: ;;#ASMSTART 9410; GFX940-NEXT: ; use s[8:9] 9411; GFX940-NEXT: ;;#ASMEND 9412; GFX940-NEXT: s_setpc_b64 s[30:31] 9413 %vec0 = call <4 x half> asm "; def $0", "=s"() 9414 %vec1 = call <4 x half> asm "; def $0", "=s"() 9415 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 0, i32 0> 9416 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9417 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9418 ret void 9419} 9420 9421define void @s_shuffle_v3f16_v4f16__6_0_0() { 9422; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_0_0: 9423; GFX900: ; %bb.0: 9424; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9425; GFX900-NEXT: ;;#ASMSTART 9426; GFX900-NEXT: ; def s[4:5] 9427; GFX900-NEXT: ;;#ASMEND 9428; GFX900-NEXT: ;;#ASMSTART 9429; GFX900-NEXT: ; def s[6:7] 9430; GFX900-NEXT: ;;#ASMEND 9431; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 9432; GFX900-NEXT: s_mov_b32 s9, s4 9433; GFX900-NEXT: ;;#ASMSTART 9434; GFX900-NEXT: ; use s[8:9] 9435; GFX900-NEXT: ;;#ASMEND 9436; GFX900-NEXT: s_setpc_b64 s[30:31] 9437; 9438; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_0_0: 9439; GFX90A: ; %bb.0: 9440; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9441; GFX90A-NEXT: ;;#ASMSTART 9442; GFX90A-NEXT: ; def s[4:5] 9443; GFX90A-NEXT: ;;#ASMEND 9444; GFX90A-NEXT: ;;#ASMSTART 9445; GFX90A-NEXT: ; def s[6:7] 9446; GFX90A-NEXT: ;;#ASMEND 9447; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 9448; GFX90A-NEXT: s_mov_b32 s9, s4 9449; GFX90A-NEXT: ;;#ASMSTART 9450; GFX90A-NEXT: ; use s[8:9] 9451; GFX90A-NEXT: ;;#ASMEND 9452; GFX90A-NEXT: s_setpc_b64 s[30:31] 9453; 9454; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_0_0: 9455; GFX940: ; %bb.0: 9456; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9457; GFX940-NEXT: ;;#ASMSTART 9458; GFX940-NEXT: ; def s[0:1] 9459; GFX940-NEXT: ;;#ASMEND 9460; GFX940-NEXT: ;;#ASMSTART 9461; GFX940-NEXT: ; def s[2:3] 9462; GFX940-NEXT: ;;#ASMEND 9463; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 9464; GFX940-NEXT: s_mov_b32 s9, s0 9465; GFX940-NEXT: ;;#ASMSTART 9466; GFX940-NEXT: ; use s[8:9] 9467; GFX940-NEXT: ;;#ASMEND 9468; GFX940-NEXT: s_setpc_b64 s[30:31] 9469 %vec0 = call <4 x half> asm "; def $0", "=s"() 9470 %vec1 = call <4 x half> asm "; def $0", "=s"() 9471 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 0, i32 0> 9472 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9473 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9474 ret void 9475} 9476 9477define void @s_shuffle_v3f16_v4f16__7_0_0() { 9478; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_0: 9479; GFX900: ; %bb.0: 9480; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9481; GFX900-NEXT: ;;#ASMSTART 9482; GFX900-NEXT: ; def s[4:5] 9483; GFX900-NEXT: ;;#ASMEND 9484; GFX900-NEXT: ;;#ASMSTART 9485; GFX900-NEXT: ; def s[6:7] 9486; GFX900-NEXT: ;;#ASMEND 9487; GFX900-NEXT: s_lshr_b32 s5, s7, 16 9488; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9489; GFX900-NEXT: s_mov_b32 s9, s4 9490; GFX900-NEXT: ;;#ASMSTART 9491; GFX900-NEXT: ; use s[8:9] 9492; GFX900-NEXT: ;;#ASMEND 9493; GFX900-NEXT: s_setpc_b64 s[30:31] 9494; 9495; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_0: 9496; GFX90A: ; %bb.0: 9497; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9498; GFX90A-NEXT: ;;#ASMSTART 9499; GFX90A-NEXT: ; def s[4:5] 9500; GFX90A-NEXT: ;;#ASMEND 9501; GFX90A-NEXT: ;;#ASMSTART 9502; GFX90A-NEXT: ; def s[6:7] 9503; GFX90A-NEXT: ;;#ASMEND 9504; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 9505; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 9506; GFX90A-NEXT: s_mov_b32 s9, s4 9507; GFX90A-NEXT: ;;#ASMSTART 9508; GFX90A-NEXT: ; use s[8:9] 9509; GFX90A-NEXT: ;;#ASMEND 9510; GFX90A-NEXT: s_setpc_b64 s[30:31] 9511; 9512; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_0: 9513; GFX940: ; %bb.0: 9514; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9515; GFX940-NEXT: ;;#ASMSTART 9516; GFX940-NEXT: ; def s[0:1] 9517; GFX940-NEXT: ;;#ASMEND 9518; GFX940-NEXT: ;;#ASMSTART 9519; GFX940-NEXT: ; def s[2:3] 9520; GFX940-NEXT: ;;#ASMEND 9521; GFX940-NEXT: s_lshr_b32 s1, s3, 16 9522; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 9523; GFX940-NEXT: s_mov_b32 s9, s0 9524; GFX940-NEXT: ;;#ASMSTART 9525; GFX940-NEXT: ; use s[8:9] 9526; GFX940-NEXT: ;;#ASMEND 9527; GFX940-NEXT: s_setpc_b64 s[30:31] 9528 %vec0 = call <4 x half> asm "; def $0", "=s"() 9529 %vec1 = call <4 x half> asm "; def $0", "=s"() 9530 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 0> 9531 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9532 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9533 ret void 9534} 9535 9536define void @s_shuffle_v3f16_v4f16__7_u_0() { 9537; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_0: 9538; GFX900: ; %bb.0: 9539; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9540; GFX900-NEXT: ;;#ASMSTART 9541; GFX900-NEXT: ; def s[4:5] 9542; GFX900-NEXT: ;;#ASMEND 9543; GFX900-NEXT: ;;#ASMSTART 9544; GFX900-NEXT: ; def s[6:7] 9545; GFX900-NEXT: ;;#ASMEND 9546; GFX900-NEXT: s_lshr_b32 s8, s7, 16 9547; GFX900-NEXT: s_mov_b32 s9, s4 9548; GFX900-NEXT: ;;#ASMSTART 9549; GFX900-NEXT: ; use s[8:9] 9550; GFX900-NEXT: ;;#ASMEND 9551; GFX900-NEXT: s_setpc_b64 s[30:31] 9552; 9553; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_0: 9554; GFX90A: ; %bb.0: 9555; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9556; GFX90A-NEXT: ;;#ASMSTART 9557; GFX90A-NEXT: ; def s[4:5] 9558; GFX90A-NEXT: ;;#ASMEND 9559; GFX90A-NEXT: ;;#ASMSTART 9560; GFX90A-NEXT: ; def s[6:7] 9561; GFX90A-NEXT: ;;#ASMEND 9562; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 9563; GFX90A-NEXT: s_mov_b32 s9, s4 9564; GFX90A-NEXT: ;;#ASMSTART 9565; GFX90A-NEXT: ; use s[8:9] 9566; GFX90A-NEXT: ;;#ASMEND 9567; GFX90A-NEXT: s_setpc_b64 s[30:31] 9568; 9569; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_0: 9570; GFX940: ; %bb.0: 9571; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9572; GFX940-NEXT: ;;#ASMSTART 9573; GFX940-NEXT: ; def s[0:1] 9574; GFX940-NEXT: ;;#ASMEND 9575; GFX940-NEXT: ;;#ASMSTART 9576; GFX940-NEXT: ; def s[2:3] 9577; GFX940-NEXT: ;;#ASMEND 9578; GFX940-NEXT: s_lshr_b32 s8, s3, 16 9579; GFX940-NEXT: s_mov_b32 s9, s0 9580; GFX940-NEXT: ;;#ASMSTART 9581; GFX940-NEXT: ; use s[8:9] 9582; GFX940-NEXT: ;;#ASMEND 9583; GFX940-NEXT: s_setpc_b64 s[30:31] 9584 %vec0 = call <4 x half> asm "; def $0", "=s"() 9585 %vec1 = call <4 x half> asm "; def $0", "=s"() 9586 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 0> 9587 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9588 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9589 ret void 9590} 9591 9592define void @s_shuffle_v3f16_v4f16__7_1_0() { 9593; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_0: 9594; GFX900: ; %bb.0: 9595; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9596; GFX900-NEXT: ;;#ASMSTART 9597; GFX900-NEXT: ; def s[4:5] 9598; GFX900-NEXT: ;;#ASMEND 9599; GFX900-NEXT: ;;#ASMSTART 9600; GFX900-NEXT: ; def s[6:7] 9601; GFX900-NEXT: ;;#ASMEND 9602; GFX900-NEXT: s_lshr_b32 s5, s4, 16 9603; GFX900-NEXT: s_lshr_b32 s6, s7, 16 9604; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9605; GFX900-NEXT: s_mov_b32 s9, s4 9606; GFX900-NEXT: ;;#ASMSTART 9607; GFX900-NEXT: ; use s[8:9] 9608; GFX900-NEXT: ;;#ASMEND 9609; GFX900-NEXT: s_setpc_b64 s[30:31] 9610; 9611; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_0: 9612; GFX90A: ; %bb.0: 9613; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9614; GFX90A-NEXT: ;;#ASMSTART 9615; GFX90A-NEXT: ; def s[4:5] 9616; GFX90A-NEXT: ;;#ASMEND 9617; GFX90A-NEXT: ;;#ASMSTART 9618; GFX90A-NEXT: ; def s[6:7] 9619; GFX90A-NEXT: ;;#ASMEND 9620; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 9621; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 9622; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9623; GFX90A-NEXT: s_mov_b32 s9, s4 9624; GFX90A-NEXT: ;;#ASMSTART 9625; GFX90A-NEXT: ; use s[8:9] 9626; GFX90A-NEXT: ;;#ASMEND 9627; GFX90A-NEXT: s_setpc_b64 s[30:31] 9628; 9629; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_0: 9630; GFX940: ; %bb.0: 9631; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9632; GFX940-NEXT: ;;#ASMSTART 9633; GFX940-NEXT: ; def s[0:1] 9634; GFX940-NEXT: ;;#ASMEND 9635; GFX940-NEXT: ;;#ASMSTART 9636; GFX940-NEXT: ; def s[2:3] 9637; GFX940-NEXT: ;;#ASMEND 9638; GFX940-NEXT: s_lshr_b32 s1, s0, 16 9639; GFX940-NEXT: s_lshr_b32 s2, s3, 16 9640; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 9641; GFX940-NEXT: s_mov_b32 s9, s0 9642; GFX940-NEXT: ;;#ASMSTART 9643; GFX940-NEXT: ; use s[8:9] 9644; GFX940-NEXT: ;;#ASMEND 9645; GFX940-NEXT: s_setpc_b64 s[30:31] 9646 %vec0 = call <4 x half> asm "; def $0", "=s"() 9647 %vec1 = call <4 x half> asm "; def $0", "=s"() 9648 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 0> 9649 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9650 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9651 ret void 9652} 9653 9654define void @s_shuffle_v3f16_v4f16__7_2_0() { 9655; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_0: 9656; GFX900: ; %bb.0: 9657; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9658; GFX900-NEXT: ;;#ASMSTART 9659; GFX900-NEXT: ; def s[6:7] 9660; GFX900-NEXT: ;;#ASMEND 9661; GFX900-NEXT: s_lshr_b32 s6, s7, 16 9662; GFX900-NEXT: ;;#ASMSTART 9663; GFX900-NEXT: ; def s[4:5] 9664; GFX900-NEXT: ;;#ASMEND 9665; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9666; GFX900-NEXT: s_mov_b32 s9, s4 9667; GFX900-NEXT: ;;#ASMSTART 9668; GFX900-NEXT: ; use s[8:9] 9669; GFX900-NEXT: ;;#ASMEND 9670; GFX900-NEXT: s_setpc_b64 s[30:31] 9671; 9672; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_0: 9673; GFX90A: ; %bb.0: 9674; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9675; GFX90A-NEXT: ;;#ASMSTART 9676; GFX90A-NEXT: ; def s[6:7] 9677; GFX90A-NEXT: ;;#ASMEND 9678; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 9679; GFX90A-NEXT: ;;#ASMSTART 9680; GFX90A-NEXT: ; def s[4:5] 9681; GFX90A-NEXT: ;;#ASMEND 9682; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9683; GFX90A-NEXT: s_mov_b32 s9, s4 9684; GFX90A-NEXT: ;;#ASMSTART 9685; GFX90A-NEXT: ; use s[8:9] 9686; GFX90A-NEXT: ;;#ASMEND 9687; GFX90A-NEXT: s_setpc_b64 s[30:31] 9688; 9689; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_0: 9690; GFX940: ; %bb.0: 9691; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9692; GFX940-NEXT: ;;#ASMSTART 9693; GFX940-NEXT: ; def s[2:3] 9694; GFX940-NEXT: ;;#ASMEND 9695; GFX940-NEXT: s_lshr_b32 s2, s3, 16 9696; GFX940-NEXT: ;;#ASMSTART 9697; GFX940-NEXT: ; def s[0:1] 9698; GFX940-NEXT: ;;#ASMEND 9699; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 9700; GFX940-NEXT: s_mov_b32 s9, s0 9701; GFX940-NEXT: ;;#ASMSTART 9702; GFX940-NEXT: ; use s[8:9] 9703; GFX940-NEXT: ;;#ASMEND 9704; GFX940-NEXT: s_setpc_b64 s[30:31] 9705 %vec0 = call <4 x half> asm "; def $0", "=s"() 9706 %vec1 = call <4 x half> asm "; def $0", "=s"() 9707 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 0> 9708 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9709 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9710 ret void 9711} 9712 9713define void @s_shuffle_v3f16_v4f16__7_3_0() { 9714; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_0: 9715; GFX900: ; %bb.0: 9716; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9717; GFX900-NEXT: ;;#ASMSTART 9718; GFX900-NEXT: ; def s[4:5] 9719; GFX900-NEXT: ;;#ASMEND 9720; GFX900-NEXT: ;;#ASMSTART 9721; GFX900-NEXT: ; def s[6:7] 9722; GFX900-NEXT: ;;#ASMEND 9723; GFX900-NEXT: s_lshr_b32 s5, s5, 16 9724; GFX900-NEXT: s_lshr_b32 s6, s7, 16 9725; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9726; GFX900-NEXT: s_mov_b32 s9, s4 9727; GFX900-NEXT: ;;#ASMSTART 9728; GFX900-NEXT: ; use s[8:9] 9729; GFX900-NEXT: ;;#ASMEND 9730; GFX900-NEXT: s_setpc_b64 s[30:31] 9731; 9732; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_0: 9733; GFX90A: ; %bb.0: 9734; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9735; GFX90A-NEXT: ;;#ASMSTART 9736; GFX90A-NEXT: ; def s[4:5] 9737; GFX90A-NEXT: ;;#ASMEND 9738; GFX90A-NEXT: ;;#ASMSTART 9739; GFX90A-NEXT: ; def s[6:7] 9740; GFX90A-NEXT: ;;#ASMEND 9741; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 9742; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 9743; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9744; GFX90A-NEXT: s_mov_b32 s9, s4 9745; GFX90A-NEXT: ;;#ASMSTART 9746; GFX90A-NEXT: ; use s[8:9] 9747; GFX90A-NEXT: ;;#ASMEND 9748; GFX90A-NEXT: s_setpc_b64 s[30:31] 9749; 9750; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_0: 9751; GFX940: ; %bb.0: 9752; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9753; GFX940-NEXT: ;;#ASMSTART 9754; GFX940-NEXT: ; def s[0:1] 9755; GFX940-NEXT: ;;#ASMEND 9756; GFX940-NEXT: ;;#ASMSTART 9757; GFX940-NEXT: ; def s[2:3] 9758; GFX940-NEXT: ;;#ASMEND 9759; GFX940-NEXT: s_lshr_b32 s1, s1, 16 9760; GFX940-NEXT: s_lshr_b32 s2, s3, 16 9761; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 9762; GFX940-NEXT: s_mov_b32 s9, s0 9763; GFX940-NEXT: ;;#ASMSTART 9764; GFX940-NEXT: ; use s[8:9] 9765; GFX940-NEXT: ;;#ASMEND 9766; GFX940-NEXT: s_setpc_b64 s[30:31] 9767 %vec0 = call <4 x half> asm "; def $0", "=s"() 9768 %vec1 = call <4 x half> asm "; def $0", "=s"() 9769 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 0> 9770 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9771 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9772 ret void 9773} 9774 9775define void @s_shuffle_v3f16_v4f16__7_4_0() { 9776; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_0: 9777; GFX900: ; %bb.0: 9778; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9779; GFX900-NEXT: ;;#ASMSTART 9780; GFX900-NEXT: ; def s[4:5] 9781; GFX900-NEXT: ;;#ASMEND 9782; GFX900-NEXT: ;;#ASMSTART 9783; GFX900-NEXT: ; def s[6:7] 9784; GFX900-NEXT: ;;#ASMEND 9785; GFX900-NEXT: s_lshr_b32 s5, s7, 16 9786; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6 9787; GFX900-NEXT: s_mov_b32 s9, s4 9788; GFX900-NEXT: ;;#ASMSTART 9789; GFX900-NEXT: ; use s[8:9] 9790; GFX900-NEXT: ;;#ASMEND 9791; GFX900-NEXT: s_setpc_b64 s[30:31] 9792; 9793; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_0: 9794; GFX90A: ; %bb.0: 9795; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9796; GFX90A-NEXT: ;;#ASMSTART 9797; GFX90A-NEXT: ; def s[4:5] 9798; GFX90A-NEXT: ;;#ASMEND 9799; GFX90A-NEXT: ;;#ASMSTART 9800; GFX90A-NEXT: ; def s[6:7] 9801; GFX90A-NEXT: ;;#ASMEND 9802; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 9803; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6 9804; GFX90A-NEXT: s_mov_b32 s9, s4 9805; GFX90A-NEXT: ;;#ASMSTART 9806; GFX90A-NEXT: ; use s[8:9] 9807; GFX90A-NEXT: ;;#ASMEND 9808; GFX90A-NEXT: s_setpc_b64 s[30:31] 9809; 9810; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_0: 9811; GFX940: ; %bb.0: 9812; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9813; GFX940-NEXT: ;;#ASMSTART 9814; GFX940-NEXT: ; def s[0:1] 9815; GFX940-NEXT: ;;#ASMEND 9816; GFX940-NEXT: ;;#ASMSTART 9817; GFX940-NEXT: ; def s[2:3] 9818; GFX940-NEXT: ;;#ASMEND 9819; GFX940-NEXT: s_lshr_b32 s1, s3, 16 9820; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 9821; GFX940-NEXT: s_mov_b32 s9, s0 9822; GFX940-NEXT: ;;#ASMSTART 9823; GFX940-NEXT: ; use s[8:9] 9824; GFX940-NEXT: ;;#ASMEND 9825; GFX940-NEXT: s_setpc_b64 s[30:31] 9826 %vec0 = call <4 x half> asm "; def $0", "=s"() 9827 %vec1 = call <4 x half> asm "; def $0", "=s"() 9828 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 0> 9829 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9830 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9831 ret void 9832} 9833 9834define void @s_shuffle_v3f16_v4f16__7_5_0() { 9835; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_0: 9836; GFX900: ; %bb.0: 9837; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9838; GFX900-NEXT: ;;#ASMSTART 9839; GFX900-NEXT: ; def s[4:5] 9840; GFX900-NEXT: ;;#ASMEND 9841; GFX900-NEXT: ;;#ASMSTART 9842; GFX900-NEXT: ; def s[6:7] 9843; GFX900-NEXT: ;;#ASMEND 9844; GFX900-NEXT: s_lshr_b32 s5, s6, 16 9845; GFX900-NEXT: s_lshr_b32 s6, s7, 16 9846; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9847; GFX900-NEXT: s_mov_b32 s9, s4 9848; GFX900-NEXT: ;;#ASMSTART 9849; GFX900-NEXT: ; use s[8:9] 9850; GFX900-NEXT: ;;#ASMEND 9851; GFX900-NEXT: s_setpc_b64 s[30:31] 9852; 9853; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_0: 9854; GFX90A: ; %bb.0: 9855; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9856; GFX90A-NEXT: ;;#ASMSTART 9857; GFX90A-NEXT: ; def s[4:5] 9858; GFX90A-NEXT: ;;#ASMEND 9859; GFX90A-NEXT: ;;#ASMSTART 9860; GFX90A-NEXT: ; def s[6:7] 9861; GFX90A-NEXT: ;;#ASMEND 9862; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 9863; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 9864; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 9865; GFX90A-NEXT: s_mov_b32 s9, s4 9866; GFX90A-NEXT: ;;#ASMSTART 9867; GFX90A-NEXT: ; use s[8:9] 9868; GFX90A-NEXT: ;;#ASMEND 9869; GFX90A-NEXT: s_setpc_b64 s[30:31] 9870; 9871; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_0: 9872; GFX940: ; %bb.0: 9873; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9874; GFX940-NEXT: ;;#ASMSTART 9875; GFX940-NEXT: ; def s[0:1] 9876; GFX940-NEXT: ;;#ASMEND 9877; GFX940-NEXT: ;;#ASMSTART 9878; GFX940-NEXT: ; def s[2:3] 9879; GFX940-NEXT: ;;#ASMEND 9880; GFX940-NEXT: s_lshr_b32 s1, s2, 16 9881; GFX940-NEXT: s_lshr_b32 s2, s3, 16 9882; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 9883; GFX940-NEXT: s_mov_b32 s9, s0 9884; GFX940-NEXT: ;;#ASMSTART 9885; GFX940-NEXT: ; use s[8:9] 9886; GFX940-NEXT: ;;#ASMEND 9887; GFX940-NEXT: s_setpc_b64 s[30:31] 9888 %vec0 = call <4 x half> asm "; def $0", "=s"() 9889 %vec1 = call <4 x half> asm "; def $0", "=s"() 9890 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 0> 9891 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9892 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9893 ret void 9894} 9895 9896define void @s_shuffle_v3f16_v4f16__7_6_0() { 9897; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_0: 9898; GFX900: ; %bb.0: 9899; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9900; GFX900-NEXT: ;;#ASMSTART 9901; GFX900-NEXT: ; def s[4:5] 9902; GFX900-NEXT: ;;#ASMEND 9903; GFX900-NEXT: ;;#ASMSTART 9904; GFX900-NEXT: ; def s[6:7] 9905; GFX900-NEXT: ;;#ASMEND 9906; GFX900-NEXT: s_lshr_b32 s5, s7, 16 9907; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 9908; GFX900-NEXT: s_mov_b32 s9, s4 9909; GFX900-NEXT: ;;#ASMSTART 9910; GFX900-NEXT: ; use s[8:9] 9911; GFX900-NEXT: ;;#ASMEND 9912; GFX900-NEXT: s_setpc_b64 s[30:31] 9913; 9914; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_0: 9915; GFX90A: ; %bb.0: 9916; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9917; GFX90A-NEXT: ;;#ASMSTART 9918; GFX90A-NEXT: ; def s[4:5] 9919; GFX90A-NEXT: ;;#ASMEND 9920; GFX90A-NEXT: ;;#ASMSTART 9921; GFX90A-NEXT: ; def s[6:7] 9922; GFX90A-NEXT: ;;#ASMEND 9923; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 9924; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 9925; GFX90A-NEXT: s_mov_b32 s9, s4 9926; GFX90A-NEXT: ;;#ASMSTART 9927; GFX90A-NEXT: ; use s[8:9] 9928; GFX90A-NEXT: ;;#ASMEND 9929; GFX90A-NEXT: s_setpc_b64 s[30:31] 9930; 9931; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_0: 9932; GFX940: ; %bb.0: 9933; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9934; GFX940-NEXT: ;;#ASMSTART 9935; GFX940-NEXT: ; def s[0:1] 9936; GFX940-NEXT: ;;#ASMEND 9937; GFX940-NEXT: ;;#ASMSTART 9938; GFX940-NEXT: ; def s[2:3] 9939; GFX940-NEXT: ;;#ASMEND 9940; GFX940-NEXT: s_lshr_b32 s1, s3, 16 9941; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 9942; GFX940-NEXT: s_mov_b32 s9, s0 9943; GFX940-NEXT: ;;#ASMSTART 9944; GFX940-NEXT: ; use s[8:9] 9945; GFX940-NEXT: ;;#ASMEND 9946; GFX940-NEXT: s_setpc_b64 s[30:31] 9947 %vec0 = call <4 x half> asm "; def $0", "=s"() 9948 %vec1 = call <4 x half> asm "; def $0", "=s"() 9949 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 0> 9950 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9951 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9952 ret void 9953} 9954 9955define void @s_shuffle_v3f16_v4f16__u_1_1() { 9956; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_1_1: 9957; GFX9: ; %bb.0: 9958; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9959; GFX9-NEXT: ;;#ASMSTART 9960; GFX9-NEXT: ; def s[8:9] 9961; GFX9-NEXT: ;;#ASMEND 9962; GFX9-NEXT: s_lshr_b32 s9, s8, 16 9963; GFX9-NEXT: ;;#ASMSTART 9964; GFX9-NEXT: ; use s[8:9] 9965; GFX9-NEXT: ;;#ASMEND 9966; GFX9-NEXT: s_setpc_b64 s[30:31] 9967 %vec0 = call <4 x half> asm "; def $0", "=s"() 9968 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 1, i32 1> 9969 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9970 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9971 ret void 9972} 9973 9974define void @s_shuffle_v3f16_v4f16__0_1_1() { 9975; GFX9-LABEL: s_shuffle_v3f16_v4f16__0_1_1: 9976; GFX9: ; %bb.0: 9977; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9978; GFX9-NEXT: ;;#ASMSTART 9979; GFX9-NEXT: ; def s[8:9] 9980; GFX9-NEXT: ;;#ASMEND 9981; GFX9-NEXT: s_lshr_b32 s9, s8, 16 9982; GFX9-NEXT: ;;#ASMSTART 9983; GFX9-NEXT: ; use s[8:9] 9984; GFX9-NEXT: ;;#ASMEND 9985; GFX9-NEXT: s_setpc_b64 s[30:31] 9986 %vec0 = call <4 x half> asm "; def $0", "=s"() 9987 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 1> 9988 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 9989 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 9990 ret void 9991} 9992 9993define void @s_shuffle_v3f16_v4f16__1_1_1() { 9994; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_1_1: 9995; GFX900: ; %bb.0: 9996; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 9997; GFX900-NEXT: ;;#ASMSTART 9998; GFX900-NEXT: ; def s[4:5] 9999; GFX900-NEXT: ;;#ASMEND 10000; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10001; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9 10002; GFX900-NEXT: ;;#ASMSTART 10003; GFX900-NEXT: ; use s[8:9] 10004; GFX900-NEXT: ;;#ASMEND 10005; GFX900-NEXT: s_setpc_b64 s[30:31] 10006; 10007; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_1_1: 10008; GFX90A: ; %bb.0: 10009; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10010; GFX90A-NEXT: ;;#ASMSTART 10011; GFX90A-NEXT: ; def s[4:5] 10012; GFX90A-NEXT: ;;#ASMEND 10013; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10014; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9 10015; GFX90A-NEXT: ;;#ASMSTART 10016; GFX90A-NEXT: ; use s[8:9] 10017; GFX90A-NEXT: ;;#ASMEND 10018; GFX90A-NEXT: s_setpc_b64 s[30:31] 10019; 10020; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_1_1: 10021; GFX940: ; %bb.0: 10022; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10023; GFX940-NEXT: ;;#ASMSTART 10024; GFX940-NEXT: ; def s[0:1] 10025; GFX940-NEXT: ;;#ASMEND 10026; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10027; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 10028; GFX940-NEXT: ;;#ASMSTART 10029; GFX940-NEXT: ; use s[8:9] 10030; GFX940-NEXT: ;;#ASMEND 10031; GFX940-NEXT: s_setpc_b64 s[30:31] 10032 %vec0 = call <4 x half> asm "; def $0", "=s"() 10033 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 1, i32 1> 10034 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10035 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10036 ret void 10037} 10038 10039define void @s_shuffle_v3f16_v4f16__2_1_1() { 10040; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_1_1: 10041; GFX900: ; %bb.0: 10042; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10043; GFX900-NEXT: ;;#ASMSTART 10044; GFX900-NEXT: ; def s[4:5] 10045; GFX900-NEXT: ;;#ASMEND 10046; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10047; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 10048; GFX900-NEXT: ;;#ASMSTART 10049; GFX900-NEXT: ; use s[8:9] 10050; GFX900-NEXT: ;;#ASMEND 10051; GFX900-NEXT: s_setpc_b64 s[30:31] 10052; 10053; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_1_1: 10054; GFX90A: ; %bb.0: 10055; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10056; GFX90A-NEXT: ;;#ASMSTART 10057; GFX90A-NEXT: ; def s[4:5] 10058; GFX90A-NEXT: ;;#ASMEND 10059; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10060; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 10061; GFX90A-NEXT: ;;#ASMSTART 10062; GFX90A-NEXT: ; use s[8:9] 10063; GFX90A-NEXT: ;;#ASMEND 10064; GFX90A-NEXT: s_setpc_b64 s[30:31] 10065; 10066; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_1_1: 10067; GFX940: ; %bb.0: 10068; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10069; GFX940-NEXT: ;;#ASMSTART 10070; GFX940-NEXT: ; def s[0:1] 10071; GFX940-NEXT: ;;#ASMEND 10072; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10073; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 10074; GFX940-NEXT: ;;#ASMSTART 10075; GFX940-NEXT: ; use s[8:9] 10076; GFX940-NEXT: ;;#ASMEND 10077; GFX940-NEXT: s_setpc_b64 s[30:31] 10078 %vec0 = call <4 x half> asm "; def $0", "=s"() 10079 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 1, i32 1> 10080 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10081 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10082 ret void 10083} 10084 10085define void @s_shuffle_v3f16_v4f16__3_1_1() { 10086; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_1_1: 10087; GFX900: ; %bb.0: 10088; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10089; GFX900-NEXT: ;;#ASMSTART 10090; GFX900-NEXT: ; def s[4:5] 10091; GFX900-NEXT: ;;#ASMEND 10092; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10093; GFX900-NEXT: s_lshr_b32 s4, s5, 16 10094; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10095; GFX900-NEXT: ;;#ASMSTART 10096; GFX900-NEXT: ; use s[8:9] 10097; GFX900-NEXT: ;;#ASMEND 10098; GFX900-NEXT: s_setpc_b64 s[30:31] 10099; 10100; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_1_1: 10101; GFX90A: ; %bb.0: 10102; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10103; GFX90A-NEXT: ;;#ASMSTART 10104; GFX90A-NEXT: ; def s[4:5] 10105; GFX90A-NEXT: ;;#ASMEND 10106; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10107; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 10108; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10109; GFX90A-NEXT: ;;#ASMSTART 10110; GFX90A-NEXT: ; use s[8:9] 10111; GFX90A-NEXT: ;;#ASMEND 10112; GFX90A-NEXT: s_setpc_b64 s[30:31] 10113; 10114; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_1_1: 10115; GFX940: ; %bb.0: 10116; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10117; GFX940-NEXT: ;;#ASMSTART 10118; GFX940-NEXT: ; def s[0:1] 10119; GFX940-NEXT: ;;#ASMEND 10120; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10121; GFX940-NEXT: s_lshr_b32 s0, s1, 16 10122; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10123; GFX940-NEXT: ;;#ASMSTART 10124; GFX940-NEXT: ; use s[8:9] 10125; GFX940-NEXT: ;;#ASMEND 10126; GFX940-NEXT: s_setpc_b64 s[30:31] 10127 %vec0 = call <4 x half> asm "; def $0", "=s"() 10128 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 1, i32 1> 10129 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10130 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10131 ret void 10132} 10133 10134define void @s_shuffle_v3f16_v4f16__4_1_1() { 10135; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_1_1: 10136; GFX9: ; %bb.0: 10137; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10138; GFX9-NEXT: ;;#ASMSTART 10139; GFX9-NEXT: ; def s[8:9] 10140; GFX9-NEXT: ;;#ASMEND 10141; GFX9-NEXT: s_lshr_b32 s9, s8, 16 10142; GFX9-NEXT: ;;#ASMSTART 10143; GFX9-NEXT: ; use s[8:9] 10144; GFX9-NEXT: ;;#ASMEND 10145; GFX9-NEXT: s_setpc_b64 s[30:31] 10146 %vec0 = call <4 x half> asm "; def $0", "=s"() 10147 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 1, i32 1> 10148 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10149 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10150 ret void 10151} 10152 10153define void @s_shuffle_v3f16_v4f16__5_1_1() { 10154; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_1_1: 10155; GFX900: ; %bb.0: 10156; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10157; GFX900-NEXT: ;;#ASMSTART 10158; GFX900-NEXT: ; def s[4:5] 10159; GFX900-NEXT: ;;#ASMEND 10160; GFX900-NEXT: ;;#ASMSTART 10161; GFX900-NEXT: ; def s[6:7] 10162; GFX900-NEXT: ;;#ASMEND 10163; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10164; GFX900-NEXT: s_lshr_b32 s4, s6, 16 10165; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10166; GFX900-NEXT: ;;#ASMSTART 10167; GFX900-NEXT: ; use s[8:9] 10168; GFX900-NEXT: ;;#ASMEND 10169; GFX900-NEXT: s_setpc_b64 s[30:31] 10170; 10171; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_1_1: 10172; GFX90A: ; %bb.0: 10173; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10174; GFX90A-NEXT: ;;#ASMSTART 10175; GFX90A-NEXT: ; def s[4:5] 10176; GFX90A-NEXT: ;;#ASMEND 10177; GFX90A-NEXT: ;;#ASMSTART 10178; GFX90A-NEXT: ; def s[6:7] 10179; GFX90A-NEXT: ;;#ASMEND 10180; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10181; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 10182; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10183; GFX90A-NEXT: ;;#ASMSTART 10184; GFX90A-NEXT: ; use s[8:9] 10185; GFX90A-NEXT: ;;#ASMEND 10186; GFX90A-NEXT: s_setpc_b64 s[30:31] 10187; 10188; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_1_1: 10189; GFX940: ; %bb.0: 10190; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10191; GFX940-NEXT: ;;#ASMSTART 10192; GFX940-NEXT: ; def s[0:1] 10193; GFX940-NEXT: ;;#ASMEND 10194; GFX940-NEXT: ;;#ASMSTART 10195; GFX940-NEXT: ; def s[2:3] 10196; GFX940-NEXT: ;;#ASMEND 10197; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10198; GFX940-NEXT: s_lshr_b32 s0, s2, 16 10199; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10200; GFX940-NEXT: ;;#ASMSTART 10201; GFX940-NEXT: ; use s[8:9] 10202; GFX940-NEXT: ;;#ASMEND 10203; GFX940-NEXT: s_setpc_b64 s[30:31] 10204 %vec0 = call <4 x half> asm "; def $0", "=s"() 10205 %vec1 = call <4 x half> asm "; def $0", "=s"() 10206 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 1, i32 1> 10207 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10208 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10209 ret void 10210} 10211 10212define void @s_shuffle_v3f16_v4f16__6_1_1() { 10213; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_1_1: 10214; GFX900: ; %bb.0: 10215; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10216; GFX900-NEXT: ;;#ASMSTART 10217; GFX900-NEXT: ; def s[4:5] 10218; GFX900-NEXT: ;;#ASMEND 10219; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10220; GFX900-NEXT: ;;#ASMSTART 10221; GFX900-NEXT: ; def s[6:7] 10222; GFX900-NEXT: ;;#ASMEND 10223; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s9 10224; GFX900-NEXT: ;;#ASMSTART 10225; GFX900-NEXT: ; use s[8:9] 10226; GFX900-NEXT: ;;#ASMEND 10227; GFX900-NEXT: s_setpc_b64 s[30:31] 10228; 10229; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_1_1: 10230; GFX90A: ; %bb.0: 10231; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10232; GFX90A-NEXT: ;;#ASMSTART 10233; GFX90A-NEXT: ; def s[4:5] 10234; GFX90A-NEXT: ;;#ASMEND 10235; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10236; GFX90A-NEXT: ;;#ASMSTART 10237; GFX90A-NEXT: ; def s[6:7] 10238; GFX90A-NEXT: ;;#ASMEND 10239; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s9 10240; GFX90A-NEXT: ;;#ASMSTART 10241; GFX90A-NEXT: ; use s[8:9] 10242; GFX90A-NEXT: ;;#ASMEND 10243; GFX90A-NEXT: s_setpc_b64 s[30:31] 10244; 10245; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_1_1: 10246; GFX940: ; %bb.0: 10247; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10248; GFX940-NEXT: ;;#ASMSTART 10249; GFX940-NEXT: ; def s[0:1] 10250; GFX940-NEXT: ;;#ASMEND 10251; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10252; GFX940-NEXT: ;;#ASMSTART 10253; GFX940-NEXT: ; def s[2:3] 10254; GFX940-NEXT: ;;#ASMEND 10255; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9 10256; GFX940-NEXT: ;;#ASMSTART 10257; GFX940-NEXT: ; use s[8:9] 10258; GFX940-NEXT: ;;#ASMEND 10259; GFX940-NEXT: s_setpc_b64 s[30:31] 10260 %vec0 = call <4 x half> asm "; def $0", "=s"() 10261 %vec1 = call <4 x half> asm "; def $0", "=s"() 10262 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 1, i32 1> 10263 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10264 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10265 ret void 10266} 10267 10268define void @s_shuffle_v3f16_v4f16__7_1_1() { 10269; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_1: 10270; GFX900: ; %bb.0: 10271; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10272; GFX900-NEXT: ;;#ASMSTART 10273; GFX900-NEXT: ; def s[4:5] 10274; GFX900-NEXT: ;;#ASMEND 10275; GFX900-NEXT: ;;#ASMSTART 10276; GFX900-NEXT: ; def s[6:7] 10277; GFX900-NEXT: ;;#ASMEND 10278; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10279; GFX900-NEXT: s_lshr_b32 s4, s7, 16 10280; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10281; GFX900-NEXT: ;;#ASMSTART 10282; GFX900-NEXT: ; use s[8:9] 10283; GFX900-NEXT: ;;#ASMEND 10284; GFX900-NEXT: s_setpc_b64 s[30:31] 10285; 10286; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_1: 10287; GFX90A: ; %bb.0: 10288; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10289; GFX90A-NEXT: ;;#ASMSTART 10290; GFX90A-NEXT: ; def s[4:5] 10291; GFX90A-NEXT: ;;#ASMEND 10292; GFX90A-NEXT: ;;#ASMSTART 10293; GFX90A-NEXT: ; def s[6:7] 10294; GFX90A-NEXT: ;;#ASMEND 10295; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10296; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 10297; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10298; GFX90A-NEXT: ;;#ASMSTART 10299; GFX90A-NEXT: ; use s[8:9] 10300; GFX90A-NEXT: ;;#ASMEND 10301; GFX90A-NEXT: s_setpc_b64 s[30:31] 10302; 10303; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_1: 10304; GFX940: ; %bb.0: 10305; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10306; GFX940-NEXT: ;;#ASMSTART 10307; GFX940-NEXT: ; def s[0:1] 10308; GFX940-NEXT: ;;#ASMEND 10309; GFX940-NEXT: ;;#ASMSTART 10310; GFX940-NEXT: ; def s[2:3] 10311; GFX940-NEXT: ;;#ASMEND 10312; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10313; GFX940-NEXT: s_lshr_b32 s0, s3, 16 10314; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10315; GFX940-NEXT: ;;#ASMSTART 10316; GFX940-NEXT: ; use s[8:9] 10317; GFX940-NEXT: ;;#ASMEND 10318; GFX940-NEXT: s_setpc_b64 s[30:31] 10319 %vec0 = call <4 x half> asm "; def $0", "=s"() 10320 %vec1 = call <4 x half> asm "; def $0", "=s"() 10321 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 1> 10322 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10323 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10324 ret void 10325} 10326 10327define void @s_shuffle_v3f16_v4f16__7_u_1() { 10328; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_1: 10329; GFX900: ; %bb.0: 10330; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10331; GFX900-NEXT: ;;#ASMSTART 10332; GFX900-NEXT: ; def s[4:5] 10333; GFX900-NEXT: ;;#ASMEND 10334; GFX900-NEXT: ;;#ASMSTART 10335; GFX900-NEXT: ; def s[6:7] 10336; GFX900-NEXT: ;;#ASMEND 10337; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10338; GFX900-NEXT: s_lshr_b32 s8, s7, 16 10339; GFX900-NEXT: ;;#ASMSTART 10340; GFX900-NEXT: ; use s[8:9] 10341; GFX900-NEXT: ;;#ASMEND 10342; GFX900-NEXT: s_setpc_b64 s[30:31] 10343; 10344; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_1: 10345; GFX90A: ; %bb.0: 10346; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10347; GFX90A-NEXT: ;;#ASMSTART 10348; GFX90A-NEXT: ; def s[4:5] 10349; GFX90A-NEXT: ;;#ASMEND 10350; GFX90A-NEXT: ;;#ASMSTART 10351; GFX90A-NEXT: ; def s[6:7] 10352; GFX90A-NEXT: ;;#ASMEND 10353; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10354; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 10355; GFX90A-NEXT: ;;#ASMSTART 10356; GFX90A-NEXT: ; use s[8:9] 10357; GFX90A-NEXT: ;;#ASMEND 10358; GFX90A-NEXT: s_setpc_b64 s[30:31] 10359; 10360; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_1: 10361; GFX940: ; %bb.0: 10362; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10363; GFX940-NEXT: ;;#ASMSTART 10364; GFX940-NEXT: ; def s[0:1] 10365; GFX940-NEXT: ;;#ASMEND 10366; GFX940-NEXT: ;;#ASMSTART 10367; GFX940-NEXT: ; def s[2:3] 10368; GFX940-NEXT: ;;#ASMEND 10369; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10370; GFX940-NEXT: s_lshr_b32 s8, s3, 16 10371; GFX940-NEXT: ;;#ASMSTART 10372; GFX940-NEXT: ; use s[8:9] 10373; GFX940-NEXT: ;;#ASMEND 10374; GFX940-NEXT: s_setpc_b64 s[30:31] 10375 %vec0 = call <4 x half> asm "; def $0", "=s"() 10376 %vec1 = call <4 x half> asm "; def $0", "=s"() 10377 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 1> 10378 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10379 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10380 ret void 10381} 10382 10383define void @s_shuffle_v3f16_v4f16__7_0_1() { 10384; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_1: 10385; GFX900: ; %bb.0: 10386; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10387; GFX900-NEXT: ;;#ASMSTART 10388; GFX900-NEXT: ; def s[4:5] 10389; GFX900-NEXT: ;;#ASMEND 10390; GFX900-NEXT: ;;#ASMSTART 10391; GFX900-NEXT: ; def s[6:7] 10392; GFX900-NEXT: ;;#ASMEND 10393; GFX900-NEXT: s_lshr_b32 s5, s7, 16 10394; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 10395; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10396; GFX900-NEXT: ;;#ASMSTART 10397; GFX900-NEXT: ; use s[8:9] 10398; GFX900-NEXT: ;;#ASMEND 10399; GFX900-NEXT: s_setpc_b64 s[30:31] 10400; 10401; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_1: 10402; GFX90A: ; %bb.0: 10403; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10404; GFX90A-NEXT: ;;#ASMSTART 10405; GFX90A-NEXT: ; def s[4:5] 10406; GFX90A-NEXT: ;;#ASMEND 10407; GFX90A-NEXT: ;;#ASMSTART 10408; GFX90A-NEXT: ; def s[6:7] 10409; GFX90A-NEXT: ;;#ASMEND 10410; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 10411; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 10412; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10413; GFX90A-NEXT: ;;#ASMSTART 10414; GFX90A-NEXT: ; use s[8:9] 10415; GFX90A-NEXT: ;;#ASMEND 10416; GFX90A-NEXT: s_setpc_b64 s[30:31] 10417; 10418; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_1: 10419; GFX940: ; %bb.0: 10420; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10421; GFX940-NEXT: ;;#ASMSTART 10422; GFX940-NEXT: ; def s[0:1] 10423; GFX940-NEXT: ;;#ASMEND 10424; GFX940-NEXT: ;;#ASMSTART 10425; GFX940-NEXT: ; def s[2:3] 10426; GFX940-NEXT: ;;#ASMEND 10427; GFX940-NEXT: s_lshr_b32 s1, s3, 16 10428; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 10429; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10430; GFX940-NEXT: ;;#ASMSTART 10431; GFX940-NEXT: ; use s[8:9] 10432; GFX940-NEXT: ;;#ASMEND 10433; GFX940-NEXT: s_setpc_b64 s[30:31] 10434 %vec0 = call <4 x half> asm "; def $0", "=s"() 10435 %vec1 = call <4 x half> asm "; def $0", "=s"() 10436 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 1> 10437 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10438 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10439 ret void 10440} 10441 10442define void @s_shuffle_v3f16_v4f16__7_2_1() { 10443; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_1: 10444; GFX900: ; %bb.0: 10445; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10446; GFX900-NEXT: ;;#ASMSTART 10447; GFX900-NEXT: ; def s[6:7] 10448; GFX900-NEXT: ;;#ASMEND 10449; GFX900-NEXT: s_lshr_b32 s6, s7, 16 10450; GFX900-NEXT: ;;#ASMSTART 10451; GFX900-NEXT: ; def s[4:5] 10452; GFX900-NEXT: ;;#ASMEND 10453; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 10454; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10455; GFX900-NEXT: ;;#ASMSTART 10456; GFX900-NEXT: ; use s[8:9] 10457; GFX900-NEXT: ;;#ASMEND 10458; GFX900-NEXT: s_setpc_b64 s[30:31] 10459; 10460; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_1: 10461; GFX90A: ; %bb.0: 10462; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10463; GFX90A-NEXT: ;;#ASMSTART 10464; GFX90A-NEXT: ; def s[6:7] 10465; GFX90A-NEXT: ;;#ASMEND 10466; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 10467; GFX90A-NEXT: ;;#ASMSTART 10468; GFX90A-NEXT: ; def s[4:5] 10469; GFX90A-NEXT: ;;#ASMEND 10470; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 10471; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10472; GFX90A-NEXT: ;;#ASMSTART 10473; GFX90A-NEXT: ; use s[8:9] 10474; GFX90A-NEXT: ;;#ASMEND 10475; GFX90A-NEXT: s_setpc_b64 s[30:31] 10476; 10477; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_1: 10478; GFX940: ; %bb.0: 10479; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10480; GFX940-NEXT: ;;#ASMSTART 10481; GFX940-NEXT: ; def s[2:3] 10482; GFX940-NEXT: ;;#ASMEND 10483; GFX940-NEXT: s_lshr_b32 s2, s3, 16 10484; GFX940-NEXT: ;;#ASMSTART 10485; GFX940-NEXT: ; def s[0:1] 10486; GFX940-NEXT: ;;#ASMEND 10487; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 10488; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10489; GFX940-NEXT: ;;#ASMSTART 10490; GFX940-NEXT: ; use s[8:9] 10491; GFX940-NEXT: ;;#ASMEND 10492; GFX940-NEXT: s_setpc_b64 s[30:31] 10493 %vec0 = call <4 x half> asm "; def $0", "=s"() 10494 %vec1 = call <4 x half> asm "; def $0", "=s"() 10495 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 1> 10496 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10497 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10498 ret void 10499} 10500 10501define void @s_shuffle_v3f16_v4f16__7_3_1() { 10502; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_1: 10503; GFX900: ; %bb.0: 10504; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10505; GFX900-NEXT: ;;#ASMSTART 10506; GFX900-NEXT: ; def s[4:5] 10507; GFX900-NEXT: ;;#ASMEND 10508; GFX900-NEXT: ;;#ASMSTART 10509; GFX900-NEXT: ; def s[6:7] 10510; GFX900-NEXT: ;;#ASMEND 10511; GFX900-NEXT: s_lshr_b32 s5, s5, 16 10512; GFX900-NEXT: s_lshr_b32 s6, s7, 16 10513; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 10514; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10515; GFX900-NEXT: ;;#ASMSTART 10516; GFX900-NEXT: ; use s[8:9] 10517; GFX900-NEXT: ;;#ASMEND 10518; GFX900-NEXT: s_setpc_b64 s[30:31] 10519; 10520; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_1: 10521; GFX90A: ; %bb.0: 10522; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10523; GFX90A-NEXT: ;;#ASMSTART 10524; GFX90A-NEXT: ; def s[4:5] 10525; GFX90A-NEXT: ;;#ASMEND 10526; GFX90A-NEXT: ;;#ASMSTART 10527; GFX90A-NEXT: ; def s[6:7] 10528; GFX90A-NEXT: ;;#ASMEND 10529; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 10530; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 10531; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 10532; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10533; GFX90A-NEXT: ;;#ASMSTART 10534; GFX90A-NEXT: ; use s[8:9] 10535; GFX90A-NEXT: ;;#ASMEND 10536; GFX90A-NEXT: s_setpc_b64 s[30:31] 10537; 10538; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_1: 10539; GFX940: ; %bb.0: 10540; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10541; GFX940-NEXT: ;;#ASMSTART 10542; GFX940-NEXT: ; def s[0:1] 10543; GFX940-NEXT: ;;#ASMEND 10544; GFX940-NEXT: ;;#ASMSTART 10545; GFX940-NEXT: ; def s[2:3] 10546; GFX940-NEXT: ;;#ASMEND 10547; GFX940-NEXT: s_lshr_b32 s1, s1, 16 10548; GFX940-NEXT: s_lshr_b32 s2, s3, 16 10549; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 10550; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10551; GFX940-NEXT: ;;#ASMSTART 10552; GFX940-NEXT: ; use s[8:9] 10553; GFX940-NEXT: ;;#ASMEND 10554; GFX940-NEXT: s_setpc_b64 s[30:31] 10555 %vec0 = call <4 x half> asm "; def $0", "=s"() 10556 %vec1 = call <4 x half> asm "; def $0", "=s"() 10557 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 1> 10558 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10559 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10560 ret void 10561} 10562 10563define void @s_shuffle_v3f16_v4f16__7_4_1() { 10564; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_1: 10565; GFX900: ; %bb.0: 10566; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10567; GFX900-NEXT: ;;#ASMSTART 10568; GFX900-NEXT: ; def s[4:5] 10569; GFX900-NEXT: ;;#ASMEND 10570; GFX900-NEXT: ;;#ASMSTART 10571; GFX900-NEXT: ; def s[6:7] 10572; GFX900-NEXT: ;;#ASMEND 10573; GFX900-NEXT: s_lshr_b32 s5, s7, 16 10574; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6 10575; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10576; GFX900-NEXT: ;;#ASMSTART 10577; GFX900-NEXT: ; use s[8:9] 10578; GFX900-NEXT: ;;#ASMEND 10579; GFX900-NEXT: s_setpc_b64 s[30:31] 10580; 10581; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_1: 10582; GFX90A: ; %bb.0: 10583; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10584; GFX90A-NEXT: ;;#ASMSTART 10585; GFX90A-NEXT: ; def s[4:5] 10586; GFX90A-NEXT: ;;#ASMEND 10587; GFX90A-NEXT: ;;#ASMSTART 10588; GFX90A-NEXT: ; def s[6:7] 10589; GFX90A-NEXT: ;;#ASMEND 10590; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 10591; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6 10592; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10593; GFX90A-NEXT: ;;#ASMSTART 10594; GFX90A-NEXT: ; use s[8:9] 10595; GFX90A-NEXT: ;;#ASMEND 10596; GFX90A-NEXT: s_setpc_b64 s[30:31] 10597; 10598; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_1: 10599; GFX940: ; %bb.0: 10600; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10601; GFX940-NEXT: ;;#ASMSTART 10602; GFX940-NEXT: ; def s[0:1] 10603; GFX940-NEXT: ;;#ASMEND 10604; GFX940-NEXT: ;;#ASMSTART 10605; GFX940-NEXT: ; def s[2:3] 10606; GFX940-NEXT: ;;#ASMEND 10607; GFX940-NEXT: s_lshr_b32 s1, s3, 16 10608; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 10609; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10610; GFX940-NEXT: ;;#ASMSTART 10611; GFX940-NEXT: ; use s[8:9] 10612; GFX940-NEXT: ;;#ASMEND 10613; GFX940-NEXT: s_setpc_b64 s[30:31] 10614 %vec0 = call <4 x half> asm "; def $0", "=s"() 10615 %vec1 = call <4 x half> asm "; def $0", "=s"() 10616 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 1> 10617 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10618 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10619 ret void 10620} 10621 10622define void @s_shuffle_v3f16_v4f16__7_5_1() { 10623; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_1: 10624; GFX900: ; %bb.0: 10625; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10626; GFX900-NEXT: ;;#ASMSTART 10627; GFX900-NEXT: ; def s[4:5] 10628; GFX900-NEXT: ;;#ASMEND 10629; GFX900-NEXT: ;;#ASMSTART 10630; GFX900-NEXT: ; def s[6:7] 10631; GFX900-NEXT: ;;#ASMEND 10632; GFX900-NEXT: s_lshr_b32 s5, s6, 16 10633; GFX900-NEXT: s_lshr_b32 s6, s7, 16 10634; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 10635; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10636; GFX900-NEXT: ;;#ASMSTART 10637; GFX900-NEXT: ; use s[8:9] 10638; GFX900-NEXT: ;;#ASMEND 10639; GFX900-NEXT: s_setpc_b64 s[30:31] 10640; 10641; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_1: 10642; GFX90A: ; %bb.0: 10643; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10644; GFX90A-NEXT: ;;#ASMSTART 10645; GFX90A-NEXT: ; def s[4:5] 10646; GFX90A-NEXT: ;;#ASMEND 10647; GFX90A-NEXT: ;;#ASMSTART 10648; GFX90A-NEXT: ; def s[6:7] 10649; GFX90A-NEXT: ;;#ASMEND 10650; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 10651; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 10652; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 10653; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10654; GFX90A-NEXT: ;;#ASMSTART 10655; GFX90A-NEXT: ; use s[8:9] 10656; GFX90A-NEXT: ;;#ASMEND 10657; GFX90A-NEXT: s_setpc_b64 s[30:31] 10658; 10659; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_1: 10660; GFX940: ; %bb.0: 10661; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10662; GFX940-NEXT: ;;#ASMSTART 10663; GFX940-NEXT: ; def s[0:1] 10664; GFX940-NEXT: ;;#ASMEND 10665; GFX940-NEXT: ;;#ASMSTART 10666; GFX940-NEXT: ; def s[2:3] 10667; GFX940-NEXT: ;;#ASMEND 10668; GFX940-NEXT: s_lshr_b32 s1, s2, 16 10669; GFX940-NEXT: s_lshr_b32 s2, s3, 16 10670; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 10671; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10672; GFX940-NEXT: ;;#ASMSTART 10673; GFX940-NEXT: ; use s[8:9] 10674; GFX940-NEXT: ;;#ASMEND 10675; GFX940-NEXT: s_setpc_b64 s[30:31] 10676 %vec0 = call <4 x half> asm "; def $0", "=s"() 10677 %vec1 = call <4 x half> asm "; def $0", "=s"() 10678 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 1> 10679 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10680 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10681 ret void 10682} 10683 10684define void @s_shuffle_v3f16_v4f16__7_6_1() { 10685; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_1: 10686; GFX900: ; %bb.0: 10687; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10688; GFX900-NEXT: ;;#ASMSTART 10689; GFX900-NEXT: ; def s[4:5] 10690; GFX900-NEXT: ;;#ASMEND 10691; GFX900-NEXT: ;;#ASMSTART 10692; GFX900-NEXT: ; def s[6:7] 10693; GFX900-NEXT: ;;#ASMEND 10694; GFX900-NEXT: s_lshr_b32 s5, s7, 16 10695; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 10696; GFX900-NEXT: s_lshr_b32 s9, s4, 16 10697; GFX900-NEXT: ;;#ASMSTART 10698; GFX900-NEXT: ; use s[8:9] 10699; GFX900-NEXT: ;;#ASMEND 10700; GFX900-NEXT: s_setpc_b64 s[30:31] 10701; 10702; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_1: 10703; GFX90A: ; %bb.0: 10704; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10705; GFX90A-NEXT: ;;#ASMSTART 10706; GFX90A-NEXT: ; def s[4:5] 10707; GFX90A-NEXT: ;;#ASMEND 10708; GFX90A-NEXT: ;;#ASMSTART 10709; GFX90A-NEXT: ; def s[6:7] 10710; GFX90A-NEXT: ;;#ASMEND 10711; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 10712; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 10713; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 10714; GFX90A-NEXT: ;;#ASMSTART 10715; GFX90A-NEXT: ; use s[8:9] 10716; GFX90A-NEXT: ;;#ASMEND 10717; GFX90A-NEXT: s_setpc_b64 s[30:31] 10718; 10719; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_1: 10720; GFX940: ; %bb.0: 10721; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10722; GFX940-NEXT: ;;#ASMSTART 10723; GFX940-NEXT: ; def s[0:1] 10724; GFX940-NEXT: ;;#ASMEND 10725; GFX940-NEXT: ;;#ASMSTART 10726; GFX940-NEXT: ; def s[2:3] 10727; GFX940-NEXT: ;;#ASMEND 10728; GFX940-NEXT: s_lshr_b32 s1, s3, 16 10729; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 10730; GFX940-NEXT: s_lshr_b32 s9, s0, 16 10731; GFX940-NEXT: ;;#ASMSTART 10732; GFX940-NEXT: ; use s[8:9] 10733; GFX940-NEXT: ;;#ASMEND 10734; GFX940-NEXT: s_setpc_b64 s[30:31] 10735 %vec0 = call <4 x half> asm "; def $0", "=s"() 10736 %vec1 = call <4 x half> asm "; def $0", "=s"() 10737 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 1> 10738 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10739 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10740 ret void 10741} 10742 10743define void @s_shuffle_v3f16_v4f16__u_2_2() { 10744; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_2_2: 10745; GFX9: ; %bb.0: 10746; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10747; GFX9-NEXT: ;;#ASMSTART 10748; GFX9-NEXT: ; def s[8:9] 10749; GFX9-NEXT: ;;#ASMEND 10750; GFX9-NEXT: s_lshl_b32 s8, s9, 16 10751; GFX9-NEXT: ;;#ASMSTART 10752; GFX9-NEXT: ; use s[8:9] 10753; GFX9-NEXT: ;;#ASMEND 10754; GFX9-NEXT: s_setpc_b64 s[30:31] 10755 %vec0 = call <4 x half> asm "; def $0", "=s"() 10756 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 2, i32 2> 10757 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10758 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10759 ret void 10760} 10761 10762define void @s_shuffle_v3f16_v4f16__0_2_2() { 10763; GFX9-LABEL: s_shuffle_v3f16_v4f16__0_2_2: 10764; GFX9: ; %bb.0: 10765; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10766; GFX9-NEXT: ;;#ASMSTART 10767; GFX9-NEXT: ; def s[8:9] 10768; GFX9-NEXT: ;;#ASMEND 10769; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9 10770; GFX9-NEXT: ;;#ASMSTART 10771; GFX9-NEXT: ; use s[8:9] 10772; GFX9-NEXT: ;;#ASMEND 10773; GFX9-NEXT: s_setpc_b64 s[30:31] 10774 %vec0 = call <4 x half> asm "; def $0", "=s"() 10775 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 2, i32 2> 10776 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10777 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10778 ret void 10779} 10780 10781define void @s_shuffle_v3f16_v4f16__1_2_2() { 10782; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_2_2: 10783; GFX900: ; %bb.0: 10784; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10785; GFX900-NEXT: ;;#ASMSTART 10786; GFX900-NEXT: ; def s[8:9] 10787; GFX900-NEXT: ;;#ASMEND 10788; GFX900-NEXT: s_lshr_b32 s4, s8, 16 10789; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10790; GFX900-NEXT: ;;#ASMSTART 10791; GFX900-NEXT: ; use s[8:9] 10792; GFX900-NEXT: ;;#ASMEND 10793; GFX900-NEXT: s_setpc_b64 s[30:31] 10794; 10795; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_2_2: 10796; GFX90A: ; %bb.0: 10797; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10798; GFX90A-NEXT: ;;#ASMSTART 10799; GFX90A-NEXT: ; def s[8:9] 10800; GFX90A-NEXT: ;;#ASMEND 10801; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 10802; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10803; GFX90A-NEXT: ;;#ASMSTART 10804; GFX90A-NEXT: ; use s[8:9] 10805; GFX90A-NEXT: ;;#ASMEND 10806; GFX90A-NEXT: s_setpc_b64 s[30:31] 10807; 10808; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_2_2: 10809; GFX940: ; %bb.0: 10810; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10811; GFX940-NEXT: ;;#ASMSTART 10812; GFX940-NEXT: ; def s[8:9] 10813; GFX940-NEXT: ;;#ASMEND 10814; GFX940-NEXT: s_lshr_b32 s0, s8, 16 10815; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10816; GFX940-NEXT: ;;#ASMSTART 10817; GFX940-NEXT: ; use s[8:9] 10818; GFX940-NEXT: ;;#ASMEND 10819; GFX940-NEXT: s_setpc_b64 s[30:31] 10820 %vec0 = call <4 x half> asm "; def $0", "=s"() 10821 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 2, i32 2> 10822 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10823 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10824 ret void 10825} 10826 10827define void @s_shuffle_v3f16_v4f16__2_2_2() { 10828; GFX9-LABEL: s_shuffle_v3f16_v4f16__2_2_2: 10829; GFX9: ; %bb.0: 10830; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10831; GFX9-NEXT: ;;#ASMSTART 10832; GFX9-NEXT: ; def s[8:9] 10833; GFX9-NEXT: ;;#ASMEND 10834; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9 10835; GFX9-NEXT: ;;#ASMSTART 10836; GFX9-NEXT: ; use s[8:9] 10837; GFX9-NEXT: ;;#ASMEND 10838; GFX9-NEXT: s_setpc_b64 s[30:31] 10839 %vec0 = call <4 x half> asm "; def $0", "=s"() 10840 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 2, i32 2> 10841 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10842 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10843 ret void 10844} 10845 10846define void @s_shuffle_v3f16_v4f16__3_2_2() { 10847; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_2_2: 10848; GFX900: ; %bb.0: 10849; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10850; GFX900-NEXT: ;;#ASMSTART 10851; GFX900-NEXT: ; def s[8:9] 10852; GFX900-NEXT: ;;#ASMEND 10853; GFX900-NEXT: s_lshr_b32 s4, s9, 16 10854; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10855; GFX900-NEXT: ;;#ASMSTART 10856; GFX900-NEXT: ; use s[8:9] 10857; GFX900-NEXT: ;;#ASMEND 10858; GFX900-NEXT: s_setpc_b64 s[30:31] 10859; 10860; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_2_2: 10861; GFX90A: ; %bb.0: 10862; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10863; GFX90A-NEXT: ;;#ASMSTART 10864; GFX90A-NEXT: ; def s[8:9] 10865; GFX90A-NEXT: ;;#ASMEND 10866; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 10867; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10868; GFX90A-NEXT: ;;#ASMSTART 10869; GFX90A-NEXT: ; use s[8:9] 10870; GFX90A-NEXT: ;;#ASMEND 10871; GFX90A-NEXT: s_setpc_b64 s[30:31] 10872; 10873; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_2_2: 10874; GFX940: ; %bb.0: 10875; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10876; GFX940-NEXT: ;;#ASMSTART 10877; GFX940-NEXT: ; def s[8:9] 10878; GFX940-NEXT: ;;#ASMEND 10879; GFX940-NEXT: s_lshr_b32 s0, s9, 16 10880; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10881; GFX940-NEXT: ;;#ASMSTART 10882; GFX940-NEXT: ; use s[8:9] 10883; GFX940-NEXT: ;;#ASMEND 10884; GFX940-NEXT: s_setpc_b64 s[30:31] 10885 %vec0 = call <4 x half> asm "; def $0", "=s"() 10886 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 2, i32 2> 10887 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10888 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10889 ret void 10890} 10891 10892define void @s_shuffle_v3f16_v4f16__4_2_2() { 10893; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_2_2: 10894; GFX9: ; %bb.0: 10895; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10896; GFX9-NEXT: ;;#ASMSTART 10897; GFX9-NEXT: ; def s[8:9] 10898; GFX9-NEXT: ;;#ASMEND 10899; GFX9-NEXT: s_lshl_b32 s8, s9, 16 10900; GFX9-NEXT: ;;#ASMSTART 10901; GFX9-NEXT: ; use s[8:9] 10902; GFX9-NEXT: ;;#ASMEND 10903; GFX9-NEXT: s_setpc_b64 s[30:31] 10904 %vec0 = call <4 x half> asm "; def $0", "=s"() 10905 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 2, i32 2> 10906 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10907 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10908 ret void 10909} 10910 10911define void @s_shuffle_v3f16_v4f16__5_2_2() { 10912; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_2_2: 10913; GFX900: ; %bb.0: 10914; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10915; GFX900-NEXT: ;;#ASMSTART 10916; GFX900-NEXT: ; def s[4:5] 10917; GFX900-NEXT: ;;#ASMEND 10918; GFX900-NEXT: ;;#ASMSTART 10919; GFX900-NEXT: ; def s[8:9] 10920; GFX900-NEXT: ;;#ASMEND 10921; GFX900-NEXT: s_lshr_b32 s4, s4, 16 10922; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10923; GFX900-NEXT: ;;#ASMSTART 10924; GFX900-NEXT: ; use s[8:9] 10925; GFX900-NEXT: ;;#ASMEND 10926; GFX900-NEXT: s_setpc_b64 s[30:31] 10927; 10928; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_2_2: 10929; GFX90A: ; %bb.0: 10930; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10931; GFX90A-NEXT: ;;#ASMSTART 10932; GFX90A-NEXT: ; def s[4:5] 10933; GFX90A-NEXT: ;;#ASMEND 10934; GFX90A-NEXT: ;;#ASMSTART 10935; GFX90A-NEXT: ; def s[8:9] 10936; GFX90A-NEXT: ;;#ASMEND 10937; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 10938; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 10939; GFX90A-NEXT: ;;#ASMSTART 10940; GFX90A-NEXT: ; use s[8:9] 10941; GFX90A-NEXT: ;;#ASMEND 10942; GFX90A-NEXT: s_setpc_b64 s[30:31] 10943; 10944; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_2_2: 10945; GFX940: ; %bb.0: 10946; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10947; GFX940-NEXT: ;;#ASMSTART 10948; GFX940-NEXT: ; def s[0:1] 10949; GFX940-NEXT: ;;#ASMEND 10950; GFX940-NEXT: ;;#ASMSTART 10951; GFX940-NEXT: ; def s[8:9] 10952; GFX940-NEXT: ;;#ASMEND 10953; GFX940-NEXT: s_lshr_b32 s0, s0, 16 10954; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 10955; GFX940-NEXT: ;;#ASMSTART 10956; GFX940-NEXT: ; use s[8:9] 10957; GFX940-NEXT: ;;#ASMEND 10958; GFX940-NEXT: s_setpc_b64 s[30:31] 10959 %vec0 = call <4 x half> asm "; def $0", "=s"() 10960 %vec1 = call <4 x half> asm "; def $0", "=s"() 10961 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 2, i32 2> 10962 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 10963 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 10964 ret void 10965} 10966 10967define void @s_shuffle_v3f16_v4f16__6_2_2() { 10968; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_2_2: 10969; GFX900: ; %bb.0: 10970; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10971; GFX900-NEXT: ;;#ASMSTART 10972; GFX900-NEXT: ; def s[8:9] 10973; GFX900-NEXT: ;;#ASMEND 10974; GFX900-NEXT: ;;#ASMSTART 10975; GFX900-NEXT: ; def s[4:5] 10976; GFX900-NEXT: ;;#ASMEND 10977; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 10978; GFX900-NEXT: ;;#ASMSTART 10979; GFX900-NEXT: ; use s[8:9] 10980; GFX900-NEXT: ;;#ASMEND 10981; GFX900-NEXT: s_setpc_b64 s[30:31] 10982; 10983; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_2_2: 10984; GFX90A: ; %bb.0: 10985; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 10986; GFX90A-NEXT: ;;#ASMSTART 10987; GFX90A-NEXT: ; def s[8:9] 10988; GFX90A-NEXT: ;;#ASMEND 10989; GFX90A-NEXT: ;;#ASMSTART 10990; GFX90A-NEXT: ; def s[4:5] 10991; GFX90A-NEXT: ;;#ASMEND 10992; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 10993; GFX90A-NEXT: ;;#ASMSTART 10994; GFX90A-NEXT: ; use s[8:9] 10995; GFX90A-NEXT: ;;#ASMEND 10996; GFX90A-NEXT: s_setpc_b64 s[30:31] 10997; 10998; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_2_2: 10999; GFX940: ; %bb.0: 11000; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11001; GFX940-NEXT: ;;#ASMSTART 11002; GFX940-NEXT: ; def s[8:9] 11003; GFX940-NEXT: ;;#ASMEND 11004; GFX940-NEXT: ;;#ASMSTART 11005; GFX940-NEXT: ; def s[0:1] 11006; GFX940-NEXT: ;;#ASMEND 11007; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 11008; GFX940-NEXT: ;;#ASMSTART 11009; GFX940-NEXT: ; use s[8:9] 11010; GFX940-NEXT: ;;#ASMEND 11011; GFX940-NEXT: s_setpc_b64 s[30:31] 11012 %vec0 = call <4 x half> asm "; def $0", "=s"() 11013 %vec1 = call <4 x half> asm "; def $0", "=s"() 11014 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 2, i32 2> 11015 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11016 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11017 ret void 11018} 11019 11020define void @s_shuffle_v3f16_v4f16__7_2_2() { 11021; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_2: 11022; GFX900: ; %bb.0: 11023; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11024; GFX900-NEXT: ;;#ASMSTART 11025; GFX900-NEXT: ; def s[4:5] 11026; GFX900-NEXT: ;;#ASMEND 11027; GFX900-NEXT: ;;#ASMSTART 11028; GFX900-NEXT: ; def s[8:9] 11029; GFX900-NEXT: ;;#ASMEND 11030; GFX900-NEXT: s_lshr_b32 s4, s5, 16 11031; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11032; GFX900-NEXT: ;;#ASMSTART 11033; GFX900-NEXT: ; use s[8:9] 11034; GFX900-NEXT: ;;#ASMEND 11035; GFX900-NEXT: s_setpc_b64 s[30:31] 11036; 11037; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_2: 11038; GFX90A: ; %bb.0: 11039; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11040; GFX90A-NEXT: ;;#ASMSTART 11041; GFX90A-NEXT: ; def s[4:5] 11042; GFX90A-NEXT: ;;#ASMEND 11043; GFX90A-NEXT: ;;#ASMSTART 11044; GFX90A-NEXT: ; def s[8:9] 11045; GFX90A-NEXT: ;;#ASMEND 11046; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 11047; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11048; GFX90A-NEXT: ;;#ASMSTART 11049; GFX90A-NEXT: ; use s[8:9] 11050; GFX90A-NEXT: ;;#ASMEND 11051; GFX90A-NEXT: s_setpc_b64 s[30:31] 11052; 11053; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_2: 11054; GFX940: ; %bb.0: 11055; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11056; GFX940-NEXT: ;;#ASMSTART 11057; GFX940-NEXT: ; def s[0:1] 11058; GFX940-NEXT: ;;#ASMEND 11059; GFX940-NEXT: ;;#ASMSTART 11060; GFX940-NEXT: ; def s[8:9] 11061; GFX940-NEXT: ;;#ASMEND 11062; GFX940-NEXT: s_lshr_b32 s0, s1, 16 11063; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 11064; GFX940-NEXT: ;;#ASMSTART 11065; GFX940-NEXT: ; use s[8:9] 11066; GFX940-NEXT: ;;#ASMEND 11067; GFX940-NEXT: s_setpc_b64 s[30:31] 11068 %vec0 = call <4 x half> asm "; def $0", "=s"() 11069 %vec1 = call <4 x half> asm "; def $0", "=s"() 11070 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 2> 11071 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11072 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11073 ret void 11074} 11075 11076define void @s_shuffle_v3f16_v4f16__7_u_2() { 11077; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_2: 11078; GFX900: ; %bb.0: 11079; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11080; GFX900-NEXT: ;;#ASMSTART 11081; GFX900-NEXT: ; def s[8:9] 11082; GFX900-NEXT: ;;#ASMEND 11083; GFX900-NEXT: ;;#ASMSTART 11084; GFX900-NEXT: ; def s[4:5] 11085; GFX900-NEXT: ;;#ASMEND 11086; GFX900-NEXT: s_lshr_b32 s8, s5, 16 11087; GFX900-NEXT: ;;#ASMSTART 11088; GFX900-NEXT: ; use s[8:9] 11089; GFX900-NEXT: ;;#ASMEND 11090; GFX900-NEXT: s_setpc_b64 s[30:31] 11091; 11092; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_2: 11093; GFX90A: ; %bb.0: 11094; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11095; GFX90A-NEXT: ;;#ASMSTART 11096; GFX90A-NEXT: ; def s[8:9] 11097; GFX90A-NEXT: ;;#ASMEND 11098; GFX90A-NEXT: ;;#ASMSTART 11099; GFX90A-NEXT: ; def s[4:5] 11100; GFX90A-NEXT: ;;#ASMEND 11101; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 11102; GFX90A-NEXT: ;;#ASMSTART 11103; GFX90A-NEXT: ; use s[8:9] 11104; GFX90A-NEXT: ;;#ASMEND 11105; GFX90A-NEXT: s_setpc_b64 s[30:31] 11106; 11107; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_2: 11108; GFX940: ; %bb.0: 11109; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11110; GFX940-NEXT: ;;#ASMSTART 11111; GFX940-NEXT: ; def s[8:9] 11112; GFX940-NEXT: ;;#ASMEND 11113; GFX940-NEXT: ;;#ASMSTART 11114; GFX940-NEXT: ; def s[0:1] 11115; GFX940-NEXT: ;;#ASMEND 11116; GFX940-NEXT: s_lshr_b32 s8, s1, 16 11117; GFX940-NEXT: ;;#ASMSTART 11118; GFX940-NEXT: ; use s[8:9] 11119; GFX940-NEXT: ;;#ASMEND 11120; GFX940-NEXT: s_setpc_b64 s[30:31] 11121 %vec0 = call <4 x half> asm "; def $0", "=s"() 11122 %vec1 = call <4 x half> asm "; def $0", "=s"() 11123 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 2> 11124 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11125 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11126 ret void 11127} 11128 11129define void @s_shuffle_v3f16_v4f16__7_0_2() { 11130; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_2: 11131; GFX900: ; %bb.0: 11132; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11133; GFX900-NEXT: ;;#ASMSTART 11134; GFX900-NEXT: ; def s[4:5] 11135; GFX900-NEXT: ;;#ASMEND 11136; GFX900-NEXT: ;;#ASMSTART 11137; GFX900-NEXT: ; def s[8:9] 11138; GFX900-NEXT: ;;#ASMEND 11139; GFX900-NEXT: s_lshr_b32 s4, s5, 16 11140; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s8 11141; GFX900-NEXT: ;;#ASMSTART 11142; GFX900-NEXT: ; use s[8:9] 11143; GFX900-NEXT: ;;#ASMEND 11144; GFX900-NEXT: s_setpc_b64 s[30:31] 11145; 11146; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_2: 11147; GFX90A: ; %bb.0: 11148; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11149; GFX90A-NEXT: ;;#ASMSTART 11150; GFX90A-NEXT: ; def s[4:5] 11151; GFX90A-NEXT: ;;#ASMEND 11152; GFX90A-NEXT: ;;#ASMSTART 11153; GFX90A-NEXT: ; def s[8:9] 11154; GFX90A-NEXT: ;;#ASMEND 11155; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 11156; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s8 11157; GFX90A-NEXT: ;;#ASMSTART 11158; GFX90A-NEXT: ; use s[8:9] 11159; GFX90A-NEXT: ;;#ASMEND 11160; GFX90A-NEXT: s_setpc_b64 s[30:31] 11161; 11162; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_2: 11163; GFX940: ; %bb.0: 11164; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11165; GFX940-NEXT: ;;#ASMSTART 11166; GFX940-NEXT: ; def s[0:1] 11167; GFX940-NEXT: ;;#ASMEND 11168; GFX940-NEXT: ;;#ASMSTART 11169; GFX940-NEXT: ; def s[8:9] 11170; GFX940-NEXT: ;;#ASMEND 11171; GFX940-NEXT: s_lshr_b32 s0, s1, 16 11172; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 11173; GFX940-NEXT: ;;#ASMSTART 11174; GFX940-NEXT: ; use s[8:9] 11175; GFX940-NEXT: ;;#ASMEND 11176; GFX940-NEXT: s_setpc_b64 s[30:31] 11177 %vec0 = call <4 x half> asm "; def $0", "=s"() 11178 %vec1 = call <4 x half> asm "; def $0", "=s"() 11179 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 2> 11180 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11181 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11182 ret void 11183} 11184 11185define void @s_shuffle_v3f16_v4f16__7_1_2() { 11186; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_2: 11187; GFX900: ; %bb.0: 11188; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11189; GFX900-NEXT: ;;#ASMSTART 11190; GFX900-NEXT: ; def s[4:5] 11191; GFX900-NEXT: ;;#ASMEND 11192; GFX900-NEXT: ;;#ASMSTART 11193; GFX900-NEXT: ; def s[8:9] 11194; GFX900-NEXT: ;;#ASMEND 11195; GFX900-NEXT: s_lshr_b32 s4, s8, 16 11196; GFX900-NEXT: s_lshr_b32 s5, s5, 16 11197; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11198; GFX900-NEXT: ;;#ASMSTART 11199; GFX900-NEXT: ; use s[8:9] 11200; GFX900-NEXT: ;;#ASMEND 11201; GFX900-NEXT: s_setpc_b64 s[30:31] 11202; 11203; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_2: 11204; GFX90A: ; %bb.0: 11205; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11206; GFX90A-NEXT: ;;#ASMSTART 11207; GFX90A-NEXT: ; def s[4:5] 11208; GFX90A-NEXT: ;;#ASMEND 11209; GFX90A-NEXT: ;;#ASMSTART 11210; GFX90A-NEXT: ; def s[8:9] 11211; GFX90A-NEXT: ;;#ASMEND 11212; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 11213; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 11214; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11215; GFX90A-NEXT: ;;#ASMSTART 11216; GFX90A-NEXT: ; use s[8:9] 11217; GFX90A-NEXT: ;;#ASMEND 11218; GFX90A-NEXT: s_setpc_b64 s[30:31] 11219; 11220; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_2: 11221; GFX940: ; %bb.0: 11222; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11223; GFX940-NEXT: ;;#ASMSTART 11224; GFX940-NEXT: ; def s[0:1] 11225; GFX940-NEXT: ;;#ASMEND 11226; GFX940-NEXT: ;;#ASMSTART 11227; GFX940-NEXT: ; def s[8:9] 11228; GFX940-NEXT: ;;#ASMEND 11229; GFX940-NEXT: s_lshr_b32 s0, s8, 16 11230; GFX940-NEXT: s_lshr_b32 s1, s1, 16 11231; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 11232; GFX940-NEXT: ;;#ASMSTART 11233; GFX940-NEXT: ; use s[8:9] 11234; GFX940-NEXT: ;;#ASMEND 11235; GFX940-NEXT: s_setpc_b64 s[30:31] 11236 %vec0 = call <4 x half> asm "; def $0", "=s"() 11237 %vec1 = call <4 x half> asm "; def $0", "=s"() 11238 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 2> 11239 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11240 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11241 ret void 11242} 11243 11244define void @s_shuffle_v3f16_v4f16__7_3_2() { 11245; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_2: 11246; GFX900: ; %bb.0: 11247; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11248; GFX900-NEXT: ;;#ASMSTART 11249; GFX900-NEXT: ; def s[4:5] 11250; GFX900-NEXT: ;;#ASMEND 11251; GFX900-NEXT: ;;#ASMSTART 11252; GFX900-NEXT: ; def s[8:9] 11253; GFX900-NEXT: ;;#ASMEND 11254; GFX900-NEXT: s_lshr_b32 s4, s9, 16 11255; GFX900-NEXT: s_lshr_b32 s5, s5, 16 11256; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11257; GFX900-NEXT: ;;#ASMSTART 11258; GFX900-NEXT: ; use s[8:9] 11259; GFX900-NEXT: ;;#ASMEND 11260; GFX900-NEXT: s_setpc_b64 s[30:31] 11261; 11262; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_2: 11263; GFX90A: ; %bb.0: 11264; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11265; GFX90A-NEXT: ;;#ASMSTART 11266; GFX90A-NEXT: ; def s[4:5] 11267; GFX90A-NEXT: ;;#ASMEND 11268; GFX90A-NEXT: ;;#ASMSTART 11269; GFX90A-NEXT: ; def s[8:9] 11270; GFX90A-NEXT: ;;#ASMEND 11271; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 11272; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 11273; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11274; GFX90A-NEXT: ;;#ASMSTART 11275; GFX90A-NEXT: ; use s[8:9] 11276; GFX90A-NEXT: ;;#ASMEND 11277; GFX90A-NEXT: s_setpc_b64 s[30:31] 11278; 11279; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_2: 11280; GFX940: ; %bb.0: 11281; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11282; GFX940-NEXT: ;;#ASMSTART 11283; GFX940-NEXT: ; def s[0:1] 11284; GFX940-NEXT: ;;#ASMEND 11285; GFX940-NEXT: ;;#ASMSTART 11286; GFX940-NEXT: ; def s[8:9] 11287; GFX940-NEXT: ;;#ASMEND 11288; GFX940-NEXT: s_lshr_b32 s0, s9, 16 11289; GFX940-NEXT: s_lshr_b32 s1, s1, 16 11290; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 11291; GFX940-NEXT: ;;#ASMSTART 11292; GFX940-NEXT: ; use s[8:9] 11293; GFX940-NEXT: ;;#ASMEND 11294; GFX940-NEXT: s_setpc_b64 s[30:31] 11295 %vec0 = call <4 x half> asm "; def $0", "=s"() 11296 %vec1 = call <4 x half> asm "; def $0", "=s"() 11297 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 2> 11298 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11299 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11300 ret void 11301} 11302 11303define void @s_shuffle_v3f16_v4f16__7_4_2() { 11304; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_2: 11305; GFX900: ; %bb.0: 11306; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11307; GFX900-NEXT: ;;#ASMSTART 11308; GFX900-NEXT: ; def s[4:5] 11309; GFX900-NEXT: ;;#ASMEND 11310; GFX900-NEXT: ;;#ASMSTART 11311; GFX900-NEXT: ; def s[8:9] 11312; GFX900-NEXT: ;;#ASMEND 11313; GFX900-NEXT: s_lshr_b32 s5, s5, 16 11314; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11315; GFX900-NEXT: ;;#ASMSTART 11316; GFX900-NEXT: ; use s[8:9] 11317; GFX900-NEXT: ;;#ASMEND 11318; GFX900-NEXT: s_setpc_b64 s[30:31] 11319; 11320; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_2: 11321; GFX90A: ; %bb.0: 11322; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11323; GFX90A-NEXT: ;;#ASMSTART 11324; GFX90A-NEXT: ; def s[4:5] 11325; GFX90A-NEXT: ;;#ASMEND 11326; GFX90A-NEXT: ;;#ASMSTART 11327; GFX90A-NEXT: ; def s[8:9] 11328; GFX90A-NEXT: ;;#ASMEND 11329; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 11330; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11331; GFX90A-NEXT: ;;#ASMSTART 11332; GFX90A-NEXT: ; use s[8:9] 11333; GFX90A-NEXT: ;;#ASMEND 11334; GFX90A-NEXT: s_setpc_b64 s[30:31] 11335; 11336; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_2: 11337; GFX940: ; %bb.0: 11338; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11339; GFX940-NEXT: ;;#ASMSTART 11340; GFX940-NEXT: ; def s[0:1] 11341; GFX940-NEXT: ;;#ASMEND 11342; GFX940-NEXT: ;;#ASMSTART 11343; GFX940-NEXT: ; def s[8:9] 11344; GFX940-NEXT: ;;#ASMEND 11345; GFX940-NEXT: s_lshr_b32 s1, s1, 16 11346; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 11347; GFX940-NEXT: ;;#ASMSTART 11348; GFX940-NEXT: ; use s[8:9] 11349; GFX940-NEXT: ;;#ASMEND 11350; GFX940-NEXT: s_setpc_b64 s[30:31] 11351 %vec0 = call <4 x half> asm "; def $0", "=s"() 11352 %vec1 = call <4 x half> asm "; def $0", "=s"() 11353 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 2> 11354 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11355 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11356 ret void 11357} 11358 11359define void @s_shuffle_v3f16_v4f16__7_5_2() { 11360; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_2: 11361; GFX900: ; %bb.0: 11362; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11363; GFX900-NEXT: ;;#ASMSTART 11364; GFX900-NEXT: ; def s[4:5] 11365; GFX900-NEXT: ;;#ASMEND 11366; GFX900-NEXT: ;;#ASMSTART 11367; GFX900-NEXT: ; def s[8:9] 11368; GFX900-NEXT: ;;#ASMEND 11369; GFX900-NEXT: s_lshr_b32 s4, s4, 16 11370; GFX900-NEXT: s_lshr_b32 s5, s5, 16 11371; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11372; GFX900-NEXT: ;;#ASMSTART 11373; GFX900-NEXT: ; use s[8:9] 11374; GFX900-NEXT: ;;#ASMEND 11375; GFX900-NEXT: s_setpc_b64 s[30:31] 11376; 11377; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_2: 11378; GFX90A: ; %bb.0: 11379; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11380; GFX90A-NEXT: ;;#ASMSTART 11381; GFX90A-NEXT: ; def s[4:5] 11382; GFX90A-NEXT: ;;#ASMEND 11383; GFX90A-NEXT: ;;#ASMSTART 11384; GFX90A-NEXT: ; def s[8:9] 11385; GFX90A-NEXT: ;;#ASMEND 11386; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 11387; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 11388; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 11389; GFX90A-NEXT: ;;#ASMSTART 11390; GFX90A-NEXT: ; use s[8:9] 11391; GFX90A-NEXT: ;;#ASMEND 11392; GFX90A-NEXT: s_setpc_b64 s[30:31] 11393; 11394; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_2: 11395; GFX940: ; %bb.0: 11396; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11397; GFX940-NEXT: ;;#ASMSTART 11398; GFX940-NEXT: ; def s[0:1] 11399; GFX940-NEXT: ;;#ASMEND 11400; GFX940-NEXT: ;;#ASMSTART 11401; GFX940-NEXT: ; def s[8:9] 11402; GFX940-NEXT: ;;#ASMEND 11403; GFX940-NEXT: s_lshr_b32 s0, s0, 16 11404; GFX940-NEXT: s_lshr_b32 s1, s1, 16 11405; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 11406; GFX940-NEXT: ;;#ASMSTART 11407; GFX940-NEXT: ; use s[8:9] 11408; GFX940-NEXT: ;;#ASMEND 11409; GFX940-NEXT: s_setpc_b64 s[30:31] 11410 %vec0 = call <4 x half> asm "; def $0", "=s"() 11411 %vec1 = call <4 x half> asm "; def $0", "=s"() 11412 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 2> 11413 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11414 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11415 ret void 11416} 11417 11418define void @s_shuffle_v3f16_v4f16__7_6_2() { 11419; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_2: 11420; GFX900: ; %bb.0: 11421; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11422; GFX900-NEXT: ;;#ASMSTART 11423; GFX900-NEXT: ; def s[4:5] 11424; GFX900-NEXT: ;;#ASMEND 11425; GFX900-NEXT: ;;#ASMSTART 11426; GFX900-NEXT: ; def s[8:9] 11427; GFX900-NEXT: ;;#ASMEND 11428; GFX900-NEXT: s_lshr_b32 s4, s5, 16 11429; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 11430; GFX900-NEXT: ;;#ASMSTART 11431; GFX900-NEXT: ; use s[8:9] 11432; GFX900-NEXT: ;;#ASMEND 11433; GFX900-NEXT: s_setpc_b64 s[30:31] 11434; 11435; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_2: 11436; GFX90A: ; %bb.0: 11437; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11438; GFX90A-NEXT: ;;#ASMSTART 11439; GFX90A-NEXT: ; def s[4:5] 11440; GFX90A-NEXT: ;;#ASMEND 11441; GFX90A-NEXT: ;;#ASMSTART 11442; GFX90A-NEXT: ; def s[8:9] 11443; GFX90A-NEXT: ;;#ASMEND 11444; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 11445; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 11446; GFX90A-NEXT: ;;#ASMSTART 11447; GFX90A-NEXT: ; use s[8:9] 11448; GFX90A-NEXT: ;;#ASMEND 11449; GFX90A-NEXT: s_setpc_b64 s[30:31] 11450; 11451; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_2: 11452; GFX940: ; %bb.0: 11453; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11454; GFX940-NEXT: ;;#ASMSTART 11455; GFX940-NEXT: ; def s[0:1] 11456; GFX940-NEXT: ;;#ASMEND 11457; GFX940-NEXT: ;;#ASMSTART 11458; GFX940-NEXT: ; def s[8:9] 11459; GFX940-NEXT: ;;#ASMEND 11460; GFX940-NEXT: s_lshr_b32 s0, s1, 16 11461; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 11462; GFX940-NEXT: ;;#ASMSTART 11463; GFX940-NEXT: ; use s[8:9] 11464; GFX940-NEXT: ;;#ASMEND 11465; GFX940-NEXT: s_setpc_b64 s[30:31] 11466 %vec0 = call <4 x half> asm "; def $0", "=s"() 11467 %vec1 = call <4 x half> asm "; def $0", "=s"() 11468 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 2> 11469 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11470 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11471 ret void 11472} 11473 11474define void @s_shuffle_v3f16_v4f16__u_3_3() { 11475; GFX900-LABEL: s_shuffle_v3f16_v4f16__u_3_3: 11476; GFX900: ; %bb.0: 11477; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11478; GFX900-NEXT: ;;#ASMSTART 11479; GFX900-NEXT: ; def s[4:5] 11480; GFX900-NEXT: ;;#ASMEND 11481; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11482; GFX900-NEXT: s_mov_b32 s8, s5 11483; GFX900-NEXT: ;;#ASMSTART 11484; GFX900-NEXT: ; use s[8:9] 11485; GFX900-NEXT: ;;#ASMEND 11486; GFX900-NEXT: s_setpc_b64 s[30:31] 11487; 11488; GFX90A-LABEL: s_shuffle_v3f16_v4f16__u_3_3: 11489; GFX90A: ; %bb.0: 11490; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11491; GFX90A-NEXT: ;;#ASMSTART 11492; GFX90A-NEXT: ; def s[4:5] 11493; GFX90A-NEXT: ;;#ASMEND 11494; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11495; GFX90A-NEXT: s_mov_b32 s8, s5 11496; GFX90A-NEXT: ;;#ASMSTART 11497; GFX90A-NEXT: ; use s[8:9] 11498; GFX90A-NEXT: ;;#ASMEND 11499; GFX90A-NEXT: s_setpc_b64 s[30:31] 11500; 11501; GFX940-LABEL: s_shuffle_v3f16_v4f16__u_3_3: 11502; GFX940: ; %bb.0: 11503; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11504; GFX940-NEXT: ;;#ASMSTART 11505; GFX940-NEXT: ; def s[0:1] 11506; GFX940-NEXT: ;;#ASMEND 11507; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11508; GFX940-NEXT: s_mov_b32 s8, s1 11509; GFX940-NEXT: ;;#ASMSTART 11510; GFX940-NEXT: ; use s[8:9] 11511; GFX940-NEXT: ;;#ASMEND 11512; GFX940-NEXT: s_setpc_b64 s[30:31] 11513 %vec0 = call <4 x half> asm "; def $0", "=s"() 11514 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 3, i32 3> 11515 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11516 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11517 ret void 11518} 11519 11520define void @s_shuffle_v3f16_v4f16__0_3_3() { 11521; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_3_3: 11522; GFX900: ; %bb.0: 11523; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11524; GFX900-NEXT: ;;#ASMSTART 11525; GFX900-NEXT: ; def s[4:5] 11526; GFX900-NEXT: ;;#ASMEND 11527; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11528; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11529; GFX900-NEXT: ;;#ASMSTART 11530; GFX900-NEXT: ; use s[8:9] 11531; GFX900-NEXT: ;;#ASMEND 11532; GFX900-NEXT: s_setpc_b64 s[30:31] 11533; 11534; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_3_3: 11535; GFX90A: ; %bb.0: 11536; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11537; GFX90A-NEXT: ;;#ASMSTART 11538; GFX90A-NEXT: ; def s[4:5] 11539; GFX90A-NEXT: ;;#ASMEND 11540; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11541; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11542; GFX90A-NEXT: ;;#ASMSTART 11543; GFX90A-NEXT: ; use s[8:9] 11544; GFX90A-NEXT: ;;#ASMEND 11545; GFX90A-NEXT: s_setpc_b64 s[30:31] 11546; 11547; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_3_3: 11548; GFX940: ; %bb.0: 11549; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11550; GFX940-NEXT: ;;#ASMSTART 11551; GFX940-NEXT: ; def s[0:1] 11552; GFX940-NEXT: ;;#ASMEND 11553; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11554; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 11555; GFX940-NEXT: ;;#ASMSTART 11556; GFX940-NEXT: ; use s[8:9] 11557; GFX940-NEXT: ;;#ASMEND 11558; GFX940-NEXT: s_setpc_b64 s[30:31] 11559 %vec0 = call <4 x half> asm "; def $0", "=s"() 11560 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 3, i32 3> 11561 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11562 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11563 ret void 11564} 11565 11566define void @s_shuffle_v3f16_v4f16__1_3_3() { 11567; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_3_3: 11568; GFX900: ; %bb.0: 11569; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11570; GFX900-NEXT: ;;#ASMSTART 11571; GFX900-NEXT: ; def s[4:5] 11572; GFX900-NEXT: ;;#ASMEND 11573; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11574; GFX900-NEXT: s_lshr_b32 s4, s4, 16 11575; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11576; GFX900-NEXT: ;;#ASMSTART 11577; GFX900-NEXT: ; use s[8:9] 11578; GFX900-NEXT: ;;#ASMEND 11579; GFX900-NEXT: s_setpc_b64 s[30:31] 11580; 11581; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_3_3: 11582; GFX90A: ; %bb.0: 11583; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11584; GFX90A-NEXT: ;;#ASMSTART 11585; GFX90A-NEXT: ; def s[4:5] 11586; GFX90A-NEXT: ;;#ASMEND 11587; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11588; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 11589; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11590; GFX90A-NEXT: ;;#ASMSTART 11591; GFX90A-NEXT: ; use s[8:9] 11592; GFX90A-NEXT: ;;#ASMEND 11593; GFX90A-NEXT: s_setpc_b64 s[30:31] 11594; 11595; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_3_3: 11596; GFX940: ; %bb.0: 11597; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11598; GFX940-NEXT: ;;#ASMSTART 11599; GFX940-NEXT: ; def s[0:1] 11600; GFX940-NEXT: ;;#ASMEND 11601; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11602; GFX940-NEXT: s_lshr_b32 s0, s0, 16 11603; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 11604; GFX940-NEXT: ;;#ASMSTART 11605; GFX940-NEXT: ; use s[8:9] 11606; GFX940-NEXT: ;;#ASMEND 11607; GFX940-NEXT: s_setpc_b64 s[30:31] 11608 %vec0 = call <4 x half> asm "; def $0", "=s"() 11609 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 3, i32 3> 11610 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11611 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11612 ret void 11613} 11614 11615define void @s_shuffle_v3f16_v4f16__2_3_3() { 11616; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_3_3: 11617; GFX900: ; %bb.0: 11618; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11619; GFX900-NEXT: ;;#ASMSTART 11620; GFX900-NEXT: ; def s[4:5] 11621; GFX900-NEXT: ;;#ASMEND 11622; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11623; GFX900-NEXT: s_mov_b32 s8, s5 11624; GFX900-NEXT: ;;#ASMSTART 11625; GFX900-NEXT: ; use s[8:9] 11626; GFX900-NEXT: ;;#ASMEND 11627; GFX900-NEXT: s_setpc_b64 s[30:31] 11628; 11629; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_3_3: 11630; GFX90A: ; %bb.0: 11631; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11632; GFX90A-NEXT: ;;#ASMSTART 11633; GFX90A-NEXT: ; def s[4:5] 11634; GFX90A-NEXT: ;;#ASMEND 11635; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11636; GFX90A-NEXT: s_mov_b32 s8, s5 11637; GFX90A-NEXT: ;;#ASMSTART 11638; GFX90A-NEXT: ; use s[8:9] 11639; GFX90A-NEXT: ;;#ASMEND 11640; GFX90A-NEXT: s_setpc_b64 s[30:31] 11641; 11642; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_3_3: 11643; GFX940: ; %bb.0: 11644; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11645; GFX940-NEXT: ;;#ASMSTART 11646; GFX940-NEXT: ; def s[0:1] 11647; GFX940-NEXT: ;;#ASMEND 11648; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11649; GFX940-NEXT: s_mov_b32 s8, s1 11650; GFX940-NEXT: ;;#ASMSTART 11651; GFX940-NEXT: ; use s[8:9] 11652; GFX940-NEXT: ;;#ASMEND 11653; GFX940-NEXT: s_setpc_b64 s[30:31] 11654 %vec0 = call <4 x half> asm "; def $0", "=s"() 11655 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 3, i32 3> 11656 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11657 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11658 ret void 11659} 11660 11661define void @s_shuffle_v3f16_v4f16__3_3_3() { 11662; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_3_3: 11663; GFX900: ; %bb.0: 11664; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11665; GFX900-NEXT: ;;#ASMSTART 11666; GFX900-NEXT: ; def s[4:5] 11667; GFX900-NEXT: ;;#ASMEND 11668; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11669; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9 11670; GFX900-NEXT: ;;#ASMSTART 11671; GFX900-NEXT: ; use s[8:9] 11672; GFX900-NEXT: ;;#ASMEND 11673; GFX900-NEXT: s_setpc_b64 s[30:31] 11674; 11675; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_3_3: 11676; GFX90A: ; %bb.0: 11677; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11678; GFX90A-NEXT: ;;#ASMSTART 11679; GFX90A-NEXT: ; def s[4:5] 11680; GFX90A-NEXT: ;;#ASMEND 11681; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11682; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9 11683; GFX90A-NEXT: ;;#ASMSTART 11684; GFX90A-NEXT: ; use s[8:9] 11685; GFX90A-NEXT: ;;#ASMEND 11686; GFX90A-NEXT: s_setpc_b64 s[30:31] 11687; 11688; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_3_3: 11689; GFX940: ; %bb.0: 11690; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11691; GFX940-NEXT: ;;#ASMSTART 11692; GFX940-NEXT: ; def s[0:1] 11693; GFX940-NEXT: ;;#ASMEND 11694; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11695; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 11696; GFX940-NEXT: ;;#ASMSTART 11697; GFX940-NEXT: ; use s[8:9] 11698; GFX940-NEXT: ;;#ASMEND 11699; GFX940-NEXT: s_setpc_b64 s[30:31] 11700 %vec0 = call <4 x half> asm "; def $0", "=s"() 11701 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 3, i32 3> 11702 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11703 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11704 ret void 11705} 11706 11707define void @s_shuffle_v3f16_v4f16__4_3_3() { 11708; GFX900-LABEL: s_shuffle_v3f16_v4f16__4_3_3: 11709; GFX900: ; %bb.0: 11710; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11711; GFX900-NEXT: ;;#ASMSTART 11712; GFX900-NEXT: ; def s[4:5] 11713; GFX900-NEXT: ;;#ASMEND 11714; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11715; GFX900-NEXT: s_mov_b32 s8, s5 11716; GFX900-NEXT: ;;#ASMSTART 11717; GFX900-NEXT: ; use s[8:9] 11718; GFX900-NEXT: ;;#ASMEND 11719; GFX900-NEXT: s_setpc_b64 s[30:31] 11720; 11721; GFX90A-LABEL: s_shuffle_v3f16_v4f16__4_3_3: 11722; GFX90A: ; %bb.0: 11723; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11724; GFX90A-NEXT: ;;#ASMSTART 11725; GFX90A-NEXT: ; def s[4:5] 11726; GFX90A-NEXT: ;;#ASMEND 11727; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11728; GFX90A-NEXT: s_mov_b32 s8, s5 11729; GFX90A-NEXT: ;;#ASMSTART 11730; GFX90A-NEXT: ; use s[8:9] 11731; GFX90A-NEXT: ;;#ASMEND 11732; GFX90A-NEXT: s_setpc_b64 s[30:31] 11733; 11734; GFX940-LABEL: s_shuffle_v3f16_v4f16__4_3_3: 11735; GFX940: ; %bb.0: 11736; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11737; GFX940-NEXT: ;;#ASMSTART 11738; GFX940-NEXT: ; def s[0:1] 11739; GFX940-NEXT: ;;#ASMEND 11740; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11741; GFX940-NEXT: s_mov_b32 s8, s1 11742; GFX940-NEXT: ;;#ASMSTART 11743; GFX940-NEXT: ; use s[8:9] 11744; GFX940-NEXT: ;;#ASMEND 11745; GFX940-NEXT: s_setpc_b64 s[30:31] 11746 %vec0 = call <4 x half> asm "; def $0", "=s"() 11747 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 3, i32 3> 11748 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11749 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11750 ret void 11751} 11752 11753define void @s_shuffle_v3f16_v4f16__5_3_3() { 11754; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_3_3: 11755; GFX900: ; %bb.0: 11756; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11757; GFX900-NEXT: ;;#ASMSTART 11758; GFX900-NEXT: ; def s[4:5] 11759; GFX900-NEXT: ;;#ASMEND 11760; GFX900-NEXT: ;;#ASMSTART 11761; GFX900-NEXT: ; def s[6:7] 11762; GFX900-NEXT: ;;#ASMEND 11763; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11764; GFX900-NEXT: s_lshr_b32 s4, s6, 16 11765; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11766; GFX900-NEXT: ;;#ASMSTART 11767; GFX900-NEXT: ; use s[8:9] 11768; GFX900-NEXT: ;;#ASMEND 11769; GFX900-NEXT: s_setpc_b64 s[30:31] 11770; 11771; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_3_3: 11772; GFX90A: ; %bb.0: 11773; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11774; GFX90A-NEXT: ;;#ASMSTART 11775; GFX90A-NEXT: ; def s[4:5] 11776; GFX90A-NEXT: ;;#ASMEND 11777; GFX90A-NEXT: ;;#ASMSTART 11778; GFX90A-NEXT: ; def s[6:7] 11779; GFX90A-NEXT: ;;#ASMEND 11780; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11781; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 11782; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11783; GFX90A-NEXT: ;;#ASMSTART 11784; GFX90A-NEXT: ; use s[8:9] 11785; GFX90A-NEXT: ;;#ASMEND 11786; GFX90A-NEXT: s_setpc_b64 s[30:31] 11787; 11788; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_3_3: 11789; GFX940: ; %bb.0: 11790; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11791; GFX940-NEXT: ;;#ASMSTART 11792; GFX940-NEXT: ; def s[0:1] 11793; GFX940-NEXT: ;;#ASMEND 11794; GFX940-NEXT: ;;#ASMSTART 11795; GFX940-NEXT: ; def s[2:3] 11796; GFX940-NEXT: ;;#ASMEND 11797; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11798; GFX940-NEXT: s_lshr_b32 s0, s2, 16 11799; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 11800; GFX940-NEXT: ;;#ASMSTART 11801; GFX940-NEXT: ; use s[8:9] 11802; GFX940-NEXT: ;;#ASMEND 11803; GFX940-NEXT: s_setpc_b64 s[30:31] 11804 %vec0 = call <4 x half> asm "; def $0", "=s"() 11805 %vec1 = call <4 x half> asm "; def $0", "=s"() 11806 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 3, i32 3> 11807 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11808 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11809 ret void 11810} 11811 11812define void @s_shuffle_v3f16_v4f16__6_3_3() { 11813; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_3_3: 11814; GFX900: ; %bb.0: 11815; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11816; GFX900-NEXT: ;;#ASMSTART 11817; GFX900-NEXT: ; def s[4:5] 11818; GFX900-NEXT: ;;#ASMEND 11819; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11820; GFX900-NEXT: ;;#ASMSTART 11821; GFX900-NEXT: ; def s[6:7] 11822; GFX900-NEXT: ;;#ASMEND 11823; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s9 11824; GFX900-NEXT: ;;#ASMSTART 11825; GFX900-NEXT: ; use s[8:9] 11826; GFX900-NEXT: ;;#ASMEND 11827; GFX900-NEXT: s_setpc_b64 s[30:31] 11828; 11829; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_3_3: 11830; GFX90A: ; %bb.0: 11831; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11832; GFX90A-NEXT: ;;#ASMSTART 11833; GFX90A-NEXT: ; def s[4:5] 11834; GFX90A-NEXT: ;;#ASMEND 11835; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11836; GFX90A-NEXT: ;;#ASMSTART 11837; GFX90A-NEXT: ; def s[6:7] 11838; GFX90A-NEXT: ;;#ASMEND 11839; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s9 11840; GFX90A-NEXT: ;;#ASMSTART 11841; GFX90A-NEXT: ; use s[8:9] 11842; GFX90A-NEXT: ;;#ASMEND 11843; GFX90A-NEXT: s_setpc_b64 s[30:31] 11844; 11845; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_3_3: 11846; GFX940: ; %bb.0: 11847; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11848; GFX940-NEXT: ;;#ASMSTART 11849; GFX940-NEXT: ; def s[0:1] 11850; GFX940-NEXT: ;;#ASMEND 11851; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11852; GFX940-NEXT: ;;#ASMSTART 11853; GFX940-NEXT: ; def s[2:3] 11854; GFX940-NEXT: ;;#ASMEND 11855; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s9 11856; GFX940-NEXT: ;;#ASMSTART 11857; GFX940-NEXT: ; use s[8:9] 11858; GFX940-NEXT: ;;#ASMEND 11859; GFX940-NEXT: s_setpc_b64 s[30:31] 11860 %vec0 = call <4 x half> asm "; def $0", "=s"() 11861 %vec1 = call <4 x half> asm "; def $0", "=s"() 11862 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 3, i32 3> 11863 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11864 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11865 ret void 11866} 11867 11868define void @s_shuffle_v3f16_v4f16__7_3_3() { 11869; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_3: 11870; GFX900: ; %bb.0: 11871; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11872; GFX900-NEXT: ;;#ASMSTART 11873; GFX900-NEXT: ; def s[4:5] 11874; GFX900-NEXT: ;;#ASMEND 11875; GFX900-NEXT: ;;#ASMSTART 11876; GFX900-NEXT: ; def s[6:7] 11877; GFX900-NEXT: ;;#ASMEND 11878; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11879; GFX900-NEXT: s_lshr_b32 s4, s7, 16 11880; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11881; GFX900-NEXT: ;;#ASMSTART 11882; GFX900-NEXT: ; use s[8:9] 11883; GFX900-NEXT: ;;#ASMEND 11884; GFX900-NEXT: s_setpc_b64 s[30:31] 11885; 11886; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_3: 11887; GFX90A: ; %bb.0: 11888; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11889; GFX90A-NEXT: ;;#ASMSTART 11890; GFX90A-NEXT: ; def s[4:5] 11891; GFX90A-NEXT: ;;#ASMEND 11892; GFX90A-NEXT: ;;#ASMSTART 11893; GFX90A-NEXT: ; def s[6:7] 11894; GFX90A-NEXT: ;;#ASMEND 11895; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11896; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 11897; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 11898; GFX90A-NEXT: ;;#ASMSTART 11899; GFX90A-NEXT: ; use s[8:9] 11900; GFX90A-NEXT: ;;#ASMEND 11901; GFX90A-NEXT: s_setpc_b64 s[30:31] 11902; 11903; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_3: 11904; GFX940: ; %bb.0: 11905; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11906; GFX940-NEXT: ;;#ASMSTART 11907; GFX940-NEXT: ; def s[0:1] 11908; GFX940-NEXT: ;;#ASMEND 11909; GFX940-NEXT: ;;#ASMSTART 11910; GFX940-NEXT: ; def s[2:3] 11911; GFX940-NEXT: ;;#ASMEND 11912; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11913; GFX940-NEXT: s_lshr_b32 s0, s3, 16 11914; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 11915; GFX940-NEXT: ;;#ASMSTART 11916; GFX940-NEXT: ; use s[8:9] 11917; GFX940-NEXT: ;;#ASMEND 11918; GFX940-NEXT: s_setpc_b64 s[30:31] 11919 %vec0 = call <4 x half> asm "; def $0", "=s"() 11920 %vec1 = call <4 x half> asm "; def $0", "=s"() 11921 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 3> 11922 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11923 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11924 ret void 11925} 11926 11927define void @s_shuffle_v3f16_v4f16__7_u_3() { 11928; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_3: 11929; GFX900: ; %bb.0: 11930; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11931; GFX900-NEXT: ;;#ASMSTART 11932; GFX900-NEXT: ; def s[4:5] 11933; GFX900-NEXT: ;;#ASMEND 11934; GFX900-NEXT: ;;#ASMSTART 11935; GFX900-NEXT: ; def s[6:7] 11936; GFX900-NEXT: ;;#ASMEND 11937; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11938; GFX900-NEXT: s_lshr_b32 s8, s7, 16 11939; GFX900-NEXT: ;;#ASMSTART 11940; GFX900-NEXT: ; use s[8:9] 11941; GFX900-NEXT: ;;#ASMEND 11942; GFX900-NEXT: s_setpc_b64 s[30:31] 11943; 11944; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_3: 11945; GFX90A: ; %bb.0: 11946; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11947; GFX90A-NEXT: ;;#ASMSTART 11948; GFX90A-NEXT: ; def s[4:5] 11949; GFX90A-NEXT: ;;#ASMEND 11950; GFX90A-NEXT: ;;#ASMSTART 11951; GFX90A-NEXT: ; def s[6:7] 11952; GFX90A-NEXT: ;;#ASMEND 11953; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 11954; GFX90A-NEXT: s_lshr_b32 s8, s7, 16 11955; GFX90A-NEXT: ;;#ASMSTART 11956; GFX90A-NEXT: ; use s[8:9] 11957; GFX90A-NEXT: ;;#ASMEND 11958; GFX90A-NEXT: s_setpc_b64 s[30:31] 11959; 11960; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_3: 11961; GFX940: ; %bb.0: 11962; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11963; GFX940-NEXT: ;;#ASMSTART 11964; GFX940-NEXT: ; def s[0:1] 11965; GFX940-NEXT: ;;#ASMEND 11966; GFX940-NEXT: ;;#ASMSTART 11967; GFX940-NEXT: ; def s[2:3] 11968; GFX940-NEXT: ;;#ASMEND 11969; GFX940-NEXT: s_lshr_b32 s9, s1, 16 11970; GFX940-NEXT: s_lshr_b32 s8, s3, 16 11971; GFX940-NEXT: ;;#ASMSTART 11972; GFX940-NEXT: ; use s[8:9] 11973; GFX940-NEXT: ;;#ASMEND 11974; GFX940-NEXT: s_setpc_b64 s[30:31] 11975 %vec0 = call <4 x half> asm "; def $0", "=s"() 11976 %vec1 = call <4 x half> asm "; def $0", "=s"() 11977 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 3> 11978 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 11979 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 11980 ret void 11981} 11982 11983define void @s_shuffle_v3f16_v4f16__7_0_3() { 11984; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_3: 11985; GFX900: ; %bb.0: 11986; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11987; GFX900-NEXT: ;;#ASMSTART 11988; GFX900-NEXT: ; def s[6:7] 11989; GFX900-NEXT: ;;#ASMEND 11990; GFX900-NEXT: s_lshr_b32 s6, s7, 16 11991; GFX900-NEXT: ;;#ASMSTART 11992; GFX900-NEXT: ; def s[4:5] 11993; GFX900-NEXT: ;;#ASMEND 11994; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 11995; GFX900-NEXT: s_lshr_b32 s9, s5, 16 11996; GFX900-NEXT: ;;#ASMSTART 11997; GFX900-NEXT: ; use s[8:9] 11998; GFX900-NEXT: ;;#ASMEND 11999; GFX900-NEXT: s_setpc_b64 s[30:31] 12000; 12001; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_3: 12002; GFX90A: ; %bb.0: 12003; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12004; GFX90A-NEXT: ;;#ASMSTART 12005; GFX90A-NEXT: ; def s[6:7] 12006; GFX90A-NEXT: ;;#ASMEND 12007; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 12008; GFX90A-NEXT: ;;#ASMSTART 12009; GFX90A-NEXT: ; def s[4:5] 12010; GFX90A-NEXT: ;;#ASMEND 12011; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 12012; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 12013; GFX90A-NEXT: ;;#ASMSTART 12014; GFX90A-NEXT: ; use s[8:9] 12015; GFX90A-NEXT: ;;#ASMEND 12016; GFX90A-NEXT: s_setpc_b64 s[30:31] 12017; 12018; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_3: 12019; GFX940: ; %bb.0: 12020; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12021; GFX940-NEXT: ;;#ASMSTART 12022; GFX940-NEXT: ; def s[2:3] 12023; GFX940-NEXT: ;;#ASMEND 12024; GFX940-NEXT: s_lshr_b32 s2, s3, 16 12025; GFX940-NEXT: ;;#ASMSTART 12026; GFX940-NEXT: ; def s[0:1] 12027; GFX940-NEXT: ;;#ASMEND 12028; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 12029; GFX940-NEXT: s_lshr_b32 s9, s1, 16 12030; GFX940-NEXT: ;;#ASMSTART 12031; GFX940-NEXT: ; use s[8:9] 12032; GFX940-NEXT: ;;#ASMEND 12033; GFX940-NEXT: s_setpc_b64 s[30:31] 12034 %vec0 = call <4 x half> asm "; def $0", "=s"() 12035 %vec1 = call <4 x half> asm "; def $0", "=s"() 12036 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 3> 12037 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12038 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12039 ret void 12040} 12041 12042define void @s_shuffle_v3f16_v4f16__7_1_3() { 12043; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_3: 12044; GFX900: ; %bb.0: 12045; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12046; GFX900-NEXT: ;;#ASMSTART 12047; GFX900-NEXT: ; def s[4:5] 12048; GFX900-NEXT: ;;#ASMEND 12049; GFX900-NEXT: ;;#ASMSTART 12050; GFX900-NEXT: ; def s[6:7] 12051; GFX900-NEXT: ;;#ASMEND 12052; GFX900-NEXT: s_lshr_b32 s4, s4, 16 12053; GFX900-NEXT: s_lshr_b32 s6, s7, 16 12054; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 12055; GFX900-NEXT: s_lshr_b32 s9, s5, 16 12056; GFX900-NEXT: ;;#ASMSTART 12057; GFX900-NEXT: ; use s[8:9] 12058; GFX900-NEXT: ;;#ASMEND 12059; GFX900-NEXT: s_setpc_b64 s[30:31] 12060; 12061; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_3: 12062; GFX90A: ; %bb.0: 12063; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12064; GFX90A-NEXT: ;;#ASMSTART 12065; GFX90A-NEXT: ; def s[4:5] 12066; GFX90A-NEXT: ;;#ASMEND 12067; GFX90A-NEXT: ;;#ASMSTART 12068; GFX90A-NEXT: ; def s[6:7] 12069; GFX90A-NEXT: ;;#ASMEND 12070; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 12071; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 12072; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 12073; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 12074; GFX90A-NEXT: ;;#ASMSTART 12075; GFX90A-NEXT: ; use s[8:9] 12076; GFX90A-NEXT: ;;#ASMEND 12077; GFX90A-NEXT: s_setpc_b64 s[30:31] 12078; 12079; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_3: 12080; GFX940: ; %bb.0: 12081; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12082; GFX940-NEXT: ;;#ASMSTART 12083; GFX940-NEXT: ; def s[0:1] 12084; GFX940-NEXT: ;;#ASMEND 12085; GFX940-NEXT: ;;#ASMSTART 12086; GFX940-NEXT: ; def s[2:3] 12087; GFX940-NEXT: ;;#ASMEND 12088; GFX940-NEXT: s_lshr_b32 s0, s0, 16 12089; GFX940-NEXT: s_lshr_b32 s2, s3, 16 12090; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 12091; GFX940-NEXT: s_lshr_b32 s9, s1, 16 12092; GFX940-NEXT: ;;#ASMSTART 12093; GFX940-NEXT: ; use s[8:9] 12094; GFX940-NEXT: ;;#ASMEND 12095; GFX940-NEXT: s_setpc_b64 s[30:31] 12096 %vec0 = call <4 x half> asm "; def $0", "=s"() 12097 %vec1 = call <4 x half> asm "; def $0", "=s"() 12098 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 3> 12099 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12100 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12101 ret void 12102} 12103 12104define void @s_shuffle_v3f16_v4f16__7_2_3() { 12105; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_3: 12106; GFX900: ; %bb.0: 12107; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12108; GFX900-NEXT: ;;#ASMSTART 12109; GFX900-NEXT: ; def s[4:5] 12110; GFX900-NEXT: ;;#ASMEND 12111; GFX900-NEXT: ;;#ASMSTART 12112; GFX900-NEXT: ; def s[6:7] 12113; GFX900-NEXT: ;;#ASMEND 12114; GFX900-NEXT: s_lshr_b32 s4, s7, 16 12115; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12116; GFX900-NEXT: s_lshr_b32 s9, s5, 16 12117; GFX900-NEXT: ;;#ASMSTART 12118; GFX900-NEXT: ; use s[8:9] 12119; GFX900-NEXT: ;;#ASMEND 12120; GFX900-NEXT: s_setpc_b64 s[30:31] 12121; 12122; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_3: 12123; GFX90A: ; %bb.0: 12124; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12125; GFX90A-NEXT: ;;#ASMSTART 12126; GFX90A-NEXT: ; def s[4:5] 12127; GFX90A-NEXT: ;;#ASMEND 12128; GFX90A-NEXT: ;;#ASMSTART 12129; GFX90A-NEXT: ; def s[6:7] 12130; GFX90A-NEXT: ;;#ASMEND 12131; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 12132; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12133; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 12134; GFX90A-NEXT: ;;#ASMSTART 12135; GFX90A-NEXT: ; use s[8:9] 12136; GFX90A-NEXT: ;;#ASMEND 12137; GFX90A-NEXT: s_setpc_b64 s[30:31] 12138; 12139; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_3: 12140; GFX940: ; %bb.0: 12141; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12142; GFX940-NEXT: ;;#ASMSTART 12143; GFX940-NEXT: ; def s[0:1] 12144; GFX940-NEXT: ;;#ASMEND 12145; GFX940-NEXT: ;;#ASMSTART 12146; GFX940-NEXT: ; def s[2:3] 12147; GFX940-NEXT: ;;#ASMEND 12148; GFX940-NEXT: s_lshr_b32 s0, s3, 16 12149; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 12150; GFX940-NEXT: s_lshr_b32 s9, s1, 16 12151; GFX940-NEXT: ;;#ASMSTART 12152; GFX940-NEXT: ; use s[8:9] 12153; GFX940-NEXT: ;;#ASMEND 12154; GFX940-NEXT: s_setpc_b64 s[30:31] 12155 %vec0 = call <4 x half> asm "; def $0", "=s"() 12156 %vec1 = call <4 x half> asm "; def $0", "=s"() 12157 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 3> 12158 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12159 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12160 ret void 12161} 12162 12163define void @s_shuffle_v3f16_v4f16__7_4_3() { 12164; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_3: 12165; GFX900: ; %bb.0: 12166; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12167; GFX900-NEXT: ;;#ASMSTART 12168; GFX900-NEXT: ; def s[4:5] 12169; GFX900-NEXT: ;;#ASMEND 12170; GFX900-NEXT: ;;#ASMSTART 12171; GFX900-NEXT: ; def s[6:7] 12172; GFX900-NEXT: ;;#ASMEND 12173; GFX900-NEXT: s_lshr_b32 s4, s7, 16 12174; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s6 12175; GFX900-NEXT: s_lshr_b32 s9, s5, 16 12176; GFX900-NEXT: ;;#ASMSTART 12177; GFX900-NEXT: ; use s[8:9] 12178; GFX900-NEXT: ;;#ASMEND 12179; GFX900-NEXT: s_setpc_b64 s[30:31] 12180; 12181; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_3: 12182; GFX90A: ; %bb.0: 12183; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12184; GFX90A-NEXT: ;;#ASMSTART 12185; GFX90A-NEXT: ; def s[4:5] 12186; GFX90A-NEXT: ;;#ASMEND 12187; GFX90A-NEXT: ;;#ASMSTART 12188; GFX90A-NEXT: ; def s[6:7] 12189; GFX90A-NEXT: ;;#ASMEND 12190; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 12191; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s6 12192; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 12193; GFX90A-NEXT: ;;#ASMSTART 12194; GFX90A-NEXT: ; use s[8:9] 12195; GFX90A-NEXT: ;;#ASMEND 12196; GFX90A-NEXT: s_setpc_b64 s[30:31] 12197; 12198; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_3: 12199; GFX940: ; %bb.0: 12200; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12201; GFX940-NEXT: ;;#ASMSTART 12202; GFX940-NEXT: ; def s[0:1] 12203; GFX940-NEXT: ;;#ASMEND 12204; GFX940-NEXT: ;;#ASMSTART 12205; GFX940-NEXT: ; def s[2:3] 12206; GFX940-NEXT: ;;#ASMEND 12207; GFX940-NEXT: s_lshr_b32 s0, s3, 16 12208; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s2 12209; GFX940-NEXT: s_lshr_b32 s9, s1, 16 12210; GFX940-NEXT: ;;#ASMSTART 12211; GFX940-NEXT: ; use s[8:9] 12212; GFX940-NEXT: ;;#ASMEND 12213; GFX940-NEXT: s_setpc_b64 s[30:31] 12214 %vec0 = call <4 x half> asm "; def $0", "=s"() 12215 %vec1 = call <4 x half> asm "; def $0", "=s"() 12216 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 3> 12217 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12218 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12219 ret void 12220} 12221 12222define void @s_shuffle_v3f16_v4f16__7_5_3() { 12223; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_3: 12224; GFX900: ; %bb.0: 12225; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12226; GFX900-NEXT: ;;#ASMSTART 12227; GFX900-NEXT: ; def s[4:5] 12228; GFX900-NEXT: ;;#ASMEND 12229; GFX900-NEXT: ;;#ASMSTART 12230; GFX900-NEXT: ; def s[6:7] 12231; GFX900-NEXT: ;;#ASMEND 12232; GFX900-NEXT: s_lshr_b32 s4, s6, 16 12233; GFX900-NEXT: s_lshr_b32 s6, s7, 16 12234; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s4 12235; GFX900-NEXT: s_lshr_b32 s9, s5, 16 12236; GFX900-NEXT: ;;#ASMSTART 12237; GFX900-NEXT: ; use s[8:9] 12238; GFX900-NEXT: ;;#ASMEND 12239; GFX900-NEXT: s_setpc_b64 s[30:31] 12240; 12241; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_3: 12242; GFX90A: ; %bb.0: 12243; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12244; GFX90A-NEXT: ;;#ASMSTART 12245; GFX90A-NEXT: ; def s[4:5] 12246; GFX90A-NEXT: ;;#ASMEND 12247; GFX90A-NEXT: ;;#ASMSTART 12248; GFX90A-NEXT: ; def s[6:7] 12249; GFX90A-NEXT: ;;#ASMEND 12250; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 12251; GFX90A-NEXT: s_lshr_b32 s6, s7, 16 12252; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s4 12253; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 12254; GFX90A-NEXT: ;;#ASMSTART 12255; GFX90A-NEXT: ; use s[8:9] 12256; GFX90A-NEXT: ;;#ASMEND 12257; GFX90A-NEXT: s_setpc_b64 s[30:31] 12258; 12259; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_3: 12260; GFX940: ; %bb.0: 12261; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12262; GFX940-NEXT: ;;#ASMSTART 12263; GFX940-NEXT: ; def s[0:1] 12264; GFX940-NEXT: ;;#ASMEND 12265; GFX940-NEXT: ;;#ASMSTART 12266; GFX940-NEXT: ; def s[2:3] 12267; GFX940-NEXT: ;;#ASMEND 12268; GFX940-NEXT: s_lshr_b32 s0, s2, 16 12269; GFX940-NEXT: s_lshr_b32 s2, s3, 16 12270; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s0 12271; GFX940-NEXT: s_lshr_b32 s9, s1, 16 12272; GFX940-NEXT: ;;#ASMSTART 12273; GFX940-NEXT: ; use s[8:9] 12274; GFX940-NEXT: ;;#ASMEND 12275; GFX940-NEXT: s_setpc_b64 s[30:31] 12276 %vec0 = call <4 x half> asm "; def $0", "=s"() 12277 %vec1 = call <4 x half> asm "; def $0", "=s"() 12278 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 3> 12279 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12280 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12281 ret void 12282} 12283 12284define void @s_shuffle_v3f16_v4f16__7_6_3() { 12285; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_3: 12286; GFX900: ; %bb.0: 12287; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12288; GFX900-NEXT: ;;#ASMSTART 12289; GFX900-NEXT: ; def s[4:5] 12290; GFX900-NEXT: ;;#ASMEND 12291; GFX900-NEXT: ;;#ASMSTART 12292; GFX900-NEXT: ; def s[6:7] 12293; GFX900-NEXT: ;;#ASMEND 12294; GFX900-NEXT: s_lshr_b32 s4, s7, 16 12295; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 12296; GFX900-NEXT: s_lshr_b32 s9, s5, 16 12297; GFX900-NEXT: ;;#ASMSTART 12298; GFX900-NEXT: ; use s[8:9] 12299; GFX900-NEXT: ;;#ASMEND 12300; GFX900-NEXT: s_setpc_b64 s[30:31] 12301; 12302; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_3: 12303; GFX90A: ; %bb.0: 12304; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12305; GFX90A-NEXT: ;;#ASMSTART 12306; GFX90A-NEXT: ; def s[4:5] 12307; GFX90A-NEXT: ;;#ASMEND 12308; GFX90A-NEXT: ;;#ASMSTART 12309; GFX90A-NEXT: ; def s[6:7] 12310; GFX90A-NEXT: ;;#ASMEND 12311; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 12312; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 12313; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 12314; GFX90A-NEXT: ;;#ASMSTART 12315; GFX90A-NEXT: ; use s[8:9] 12316; GFX90A-NEXT: ;;#ASMEND 12317; GFX90A-NEXT: s_setpc_b64 s[30:31] 12318; 12319; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_3: 12320; GFX940: ; %bb.0: 12321; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12322; GFX940-NEXT: ;;#ASMSTART 12323; GFX940-NEXT: ; def s[0:1] 12324; GFX940-NEXT: ;;#ASMEND 12325; GFX940-NEXT: ;;#ASMSTART 12326; GFX940-NEXT: ; def s[2:3] 12327; GFX940-NEXT: ;;#ASMEND 12328; GFX940-NEXT: s_lshr_b32 s0, s3, 16 12329; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 12330; GFX940-NEXT: s_lshr_b32 s9, s1, 16 12331; GFX940-NEXT: ;;#ASMSTART 12332; GFX940-NEXT: ; use s[8:9] 12333; GFX940-NEXT: ;;#ASMEND 12334; GFX940-NEXT: s_setpc_b64 s[30:31] 12335 %vec0 = call <4 x half> asm "; def $0", "=s"() 12336 %vec1 = call <4 x half> asm "; def $0", "=s"() 12337 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 3> 12338 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12339 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12340 ret void 12341} 12342 12343define void @s_shuffle_v3f16_v4f16__u_4_4() { 12344; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_4_4: 12345; GFX9: ; %bb.0: 12346; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12347; GFX9-NEXT: ;;#ASMSTART 12348; GFX9-NEXT: ; use s[8:9] 12349; GFX9-NEXT: ;;#ASMEND 12350; GFX9-NEXT: s_setpc_b64 s[30:31] 12351 %vec0 = call <4 x half> asm "; def $0", "=s"() 12352 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 poison, i32 4, i32 4> 12353 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12354 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12355 ret void 12356} 12357 12358define void @s_shuffle_v3f16_v4f16__0_4_4() { 12359; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_4_4: 12360; GFX900: ; %bb.0: 12361; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12362; GFX900-NEXT: ;;#ASMSTART 12363; GFX900-NEXT: ; def s[8:9] 12364; GFX900-NEXT: ;;#ASMEND 12365; GFX900-NEXT: ;;#ASMSTART 12366; GFX900-NEXT: ; use s[8:9] 12367; GFX900-NEXT: ;;#ASMEND 12368; GFX900-NEXT: s_setpc_b64 s[30:31] 12369; 12370; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_4_4: 12371; GFX90A: ; %bb.0: 12372; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12373; GFX90A-NEXT: ;;#ASMSTART 12374; GFX90A-NEXT: ; def s[8:9] 12375; GFX90A-NEXT: ;;#ASMEND 12376; GFX90A-NEXT: ;;#ASMSTART 12377; GFX90A-NEXT: ; use s[8:9] 12378; GFX90A-NEXT: ;;#ASMEND 12379; GFX90A-NEXT: s_setpc_b64 s[30:31] 12380; 12381; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_4_4: 12382; GFX940: ; %bb.0: 12383; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12384; GFX940-NEXT: ;;#ASMSTART 12385; GFX940-NEXT: ; def s[8:9] 12386; GFX940-NEXT: ;;#ASMEND 12387; GFX940-NEXT: s_nop 0 12388; GFX940-NEXT: ;;#ASMSTART 12389; GFX940-NEXT: ; use s[8:9] 12390; GFX940-NEXT: ;;#ASMEND 12391; GFX940-NEXT: s_setpc_b64 s[30:31] 12392 %vec0 = call <4 x half> asm "; def $0", "=s"() 12393 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 4, i32 4> 12394 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12395 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12396 ret void 12397} 12398 12399define void @s_shuffle_v3f16_v4f16__1_4_4() { 12400; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_4_4: 12401; GFX900: ; %bb.0: 12402; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12403; GFX900-NEXT: ;;#ASMSTART 12404; GFX900-NEXT: ; def s[4:5] 12405; GFX900-NEXT: ;;#ASMEND 12406; GFX900-NEXT: s_lshr_b32 s8, s4, 16 12407; GFX900-NEXT: ;;#ASMSTART 12408; GFX900-NEXT: ; use s[8:9] 12409; GFX900-NEXT: ;;#ASMEND 12410; GFX900-NEXT: s_setpc_b64 s[30:31] 12411; 12412; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_4_4: 12413; GFX90A: ; %bb.0: 12414; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12415; GFX90A-NEXT: ;;#ASMSTART 12416; GFX90A-NEXT: ; def s[4:5] 12417; GFX90A-NEXT: ;;#ASMEND 12418; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 12419; GFX90A-NEXT: ;;#ASMSTART 12420; GFX90A-NEXT: ; use s[8:9] 12421; GFX90A-NEXT: ;;#ASMEND 12422; GFX90A-NEXT: s_setpc_b64 s[30:31] 12423; 12424; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_4_4: 12425; GFX940: ; %bb.0: 12426; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12427; GFX940-NEXT: ;;#ASMSTART 12428; GFX940-NEXT: ; def s[0:1] 12429; GFX940-NEXT: ;;#ASMEND 12430; GFX940-NEXT: s_lshr_b32 s8, s0, 16 12431; GFX940-NEXT: ;;#ASMSTART 12432; GFX940-NEXT: ; use s[8:9] 12433; GFX940-NEXT: ;;#ASMEND 12434; GFX940-NEXT: s_setpc_b64 s[30:31] 12435 %vec0 = call <4 x half> asm "; def $0", "=s"() 12436 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 1, i32 4, i32 4> 12437 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12438 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12439 ret void 12440} 12441 12442define void @s_shuffle_v3f16_v4f16__2_4_4() { 12443; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_4_4: 12444; GFX900: ; %bb.0: 12445; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12446; GFX900-NEXT: ;;#ASMSTART 12447; GFX900-NEXT: ; def s[4:5] 12448; GFX900-NEXT: ;;#ASMEND 12449; GFX900-NEXT: s_mov_b32 s8, s5 12450; GFX900-NEXT: ;;#ASMSTART 12451; GFX900-NEXT: ; use s[8:9] 12452; GFX900-NEXT: ;;#ASMEND 12453; GFX900-NEXT: s_setpc_b64 s[30:31] 12454; 12455; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_4_4: 12456; GFX90A: ; %bb.0: 12457; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12458; GFX90A-NEXT: ;;#ASMSTART 12459; GFX90A-NEXT: ; def s[4:5] 12460; GFX90A-NEXT: ;;#ASMEND 12461; GFX90A-NEXT: s_mov_b32 s8, s5 12462; GFX90A-NEXT: ;;#ASMSTART 12463; GFX90A-NEXT: ; use s[8:9] 12464; GFX90A-NEXT: ;;#ASMEND 12465; GFX90A-NEXT: s_setpc_b64 s[30:31] 12466; 12467; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_4_4: 12468; GFX940: ; %bb.0: 12469; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12470; GFX940-NEXT: ;;#ASMSTART 12471; GFX940-NEXT: ; def s[0:1] 12472; GFX940-NEXT: ;;#ASMEND 12473; GFX940-NEXT: s_mov_b32 s8, s1 12474; GFX940-NEXT: ;;#ASMSTART 12475; GFX940-NEXT: ; use s[8:9] 12476; GFX940-NEXT: ;;#ASMEND 12477; GFX940-NEXT: s_setpc_b64 s[30:31] 12478 %vec0 = call <4 x half> asm "; def $0", "=s"() 12479 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 2, i32 4, i32 4> 12480 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12481 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12482 ret void 12483} 12484 12485define void @s_shuffle_v3f16_v4f16__3_4_4() { 12486; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_4_4: 12487; GFX900: ; %bb.0: 12488; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12489; GFX900-NEXT: ;;#ASMSTART 12490; GFX900-NEXT: ; def s[4:5] 12491; GFX900-NEXT: ;;#ASMEND 12492; GFX900-NEXT: s_lshr_b32 s8, s5, 16 12493; GFX900-NEXT: ;;#ASMSTART 12494; GFX900-NEXT: ; use s[8:9] 12495; GFX900-NEXT: ;;#ASMEND 12496; GFX900-NEXT: s_setpc_b64 s[30:31] 12497; 12498; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_4_4: 12499; GFX90A: ; %bb.0: 12500; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12501; GFX90A-NEXT: ;;#ASMSTART 12502; GFX90A-NEXT: ; def s[4:5] 12503; GFX90A-NEXT: ;;#ASMEND 12504; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 12505; GFX90A-NEXT: ;;#ASMSTART 12506; GFX90A-NEXT: ; use s[8:9] 12507; GFX90A-NEXT: ;;#ASMEND 12508; GFX90A-NEXT: s_setpc_b64 s[30:31] 12509; 12510; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_4_4: 12511; GFX940: ; %bb.0: 12512; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12513; GFX940-NEXT: ;;#ASMSTART 12514; GFX940-NEXT: ; def s[0:1] 12515; GFX940-NEXT: ;;#ASMEND 12516; GFX940-NEXT: s_lshr_b32 s8, s1, 16 12517; GFX940-NEXT: ;;#ASMSTART 12518; GFX940-NEXT: ; use s[8:9] 12519; GFX940-NEXT: ;;#ASMEND 12520; GFX940-NEXT: s_setpc_b64 s[30:31] 12521 %vec0 = call <4 x half> asm "; def $0", "=s"() 12522 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 3, i32 4, i32 4> 12523 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12524 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12525 ret void 12526} 12527 12528define void @s_shuffle_v3f16_v4f16__4_4_4() { 12529; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_4_4: 12530; GFX9: ; %bb.0: 12531; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12532; GFX9-NEXT: ;;#ASMSTART 12533; GFX9-NEXT: ; use s[8:9] 12534; GFX9-NEXT: ;;#ASMEND 12535; GFX9-NEXT: s_setpc_b64 s[30:31] 12536 %vec0 = call <4 x half> asm "; def $0", "=s"() 12537 %shuf = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 4, i32 4, i32 4> 12538 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12539 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12540 ret void 12541} 12542 12543define void @s_shuffle_v3f16_v4f16__5_4_4() { 12544; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_4_4: 12545; GFX900: ; %bb.0: 12546; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12547; GFX900-NEXT: ;;#ASMSTART 12548; GFX900-NEXT: ; def s[4:5] 12549; GFX900-NEXT: ;;#ASMEND 12550; GFX900-NEXT: s_lshr_b32 s5, s4, 16 12551; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12552; GFX900-NEXT: s_mov_b32 s9, s4 12553; GFX900-NEXT: ;;#ASMSTART 12554; GFX900-NEXT: ; use s[8:9] 12555; GFX900-NEXT: ;;#ASMEND 12556; GFX900-NEXT: s_setpc_b64 s[30:31] 12557; 12558; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_4_4: 12559; GFX90A: ; %bb.0: 12560; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12561; GFX90A-NEXT: ;;#ASMSTART 12562; GFX90A-NEXT: ; def s[4:5] 12563; GFX90A-NEXT: ;;#ASMEND 12564; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 12565; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12566; GFX90A-NEXT: s_mov_b32 s9, s4 12567; GFX90A-NEXT: ;;#ASMSTART 12568; GFX90A-NEXT: ; use s[8:9] 12569; GFX90A-NEXT: ;;#ASMEND 12570; GFX90A-NEXT: s_setpc_b64 s[30:31] 12571; 12572; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_4_4: 12573; GFX940: ; %bb.0: 12574; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12575; GFX940-NEXT: ;;#ASMSTART 12576; GFX940-NEXT: ; def s[0:1] 12577; GFX940-NEXT: ;;#ASMEND 12578; GFX940-NEXT: s_lshr_b32 s1, s0, 16 12579; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12580; GFX940-NEXT: s_mov_b32 s9, s0 12581; GFX940-NEXT: ;;#ASMSTART 12582; GFX940-NEXT: ; use s[8:9] 12583; GFX940-NEXT: ;;#ASMEND 12584; GFX940-NEXT: s_setpc_b64 s[30:31] 12585 %vec0 = call <4 x half> asm "; def $0", "=s"() 12586 %vec1 = call <4 x half> asm "; def $0", "=s"() 12587 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 4, i32 4> 12588 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12589 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12590 ret void 12591} 12592 12593define void @s_shuffle_v3f16_v4f16__6_4_4() { 12594; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_4_4: 12595; GFX900: ; %bb.0: 12596; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12597; GFX900-NEXT: ;;#ASMSTART 12598; GFX900-NEXT: ; def s[4:5] 12599; GFX900-NEXT: ;;#ASMEND 12600; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12601; GFX900-NEXT: s_mov_b32 s9, s4 12602; GFX900-NEXT: ;;#ASMSTART 12603; GFX900-NEXT: ; use s[8:9] 12604; GFX900-NEXT: ;;#ASMEND 12605; GFX900-NEXT: s_setpc_b64 s[30:31] 12606; 12607; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_4_4: 12608; GFX90A: ; %bb.0: 12609; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12610; GFX90A-NEXT: ;;#ASMSTART 12611; GFX90A-NEXT: ; def s[4:5] 12612; GFX90A-NEXT: ;;#ASMEND 12613; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12614; GFX90A-NEXT: s_mov_b32 s9, s4 12615; GFX90A-NEXT: ;;#ASMSTART 12616; GFX90A-NEXT: ; use s[8:9] 12617; GFX90A-NEXT: ;;#ASMEND 12618; GFX90A-NEXT: s_setpc_b64 s[30:31] 12619; 12620; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_4_4: 12621; GFX940: ; %bb.0: 12622; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12623; GFX940-NEXT: ;;#ASMSTART 12624; GFX940-NEXT: ; def s[0:1] 12625; GFX940-NEXT: ;;#ASMEND 12626; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12627; GFX940-NEXT: s_mov_b32 s9, s0 12628; GFX940-NEXT: ;;#ASMSTART 12629; GFX940-NEXT: ; use s[8:9] 12630; GFX940-NEXT: ;;#ASMEND 12631; GFX940-NEXT: s_setpc_b64 s[30:31] 12632 %vec0 = call <4 x half> asm "; def $0", "=s"() 12633 %vec1 = call <4 x half> asm "; def $0", "=s"() 12634 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 4, i32 4> 12635 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12636 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12637 ret void 12638} 12639 12640define void @s_shuffle_v3f16_v4f16__7_4_4() { 12641; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_4: 12642; GFX900: ; %bb.0: 12643; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12644; GFX900-NEXT: ;;#ASMSTART 12645; GFX900-NEXT: ; def s[4:5] 12646; GFX900-NEXT: ;;#ASMEND 12647; GFX900-NEXT: s_lshr_b32 s5, s5, 16 12648; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12649; GFX900-NEXT: s_mov_b32 s9, s4 12650; GFX900-NEXT: ;;#ASMSTART 12651; GFX900-NEXT: ; use s[8:9] 12652; GFX900-NEXT: ;;#ASMEND 12653; GFX900-NEXT: s_setpc_b64 s[30:31] 12654; 12655; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_4: 12656; GFX90A: ; %bb.0: 12657; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12658; GFX90A-NEXT: ;;#ASMSTART 12659; GFX90A-NEXT: ; def s[4:5] 12660; GFX90A-NEXT: ;;#ASMEND 12661; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 12662; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12663; GFX90A-NEXT: s_mov_b32 s9, s4 12664; GFX90A-NEXT: ;;#ASMSTART 12665; GFX90A-NEXT: ; use s[8:9] 12666; GFX90A-NEXT: ;;#ASMEND 12667; GFX90A-NEXT: s_setpc_b64 s[30:31] 12668; 12669; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_4: 12670; GFX940: ; %bb.0: 12671; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12672; GFX940-NEXT: ;;#ASMSTART 12673; GFX940-NEXT: ; def s[0:1] 12674; GFX940-NEXT: ;;#ASMEND 12675; GFX940-NEXT: s_lshr_b32 s1, s1, 16 12676; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12677; GFX940-NEXT: s_mov_b32 s9, s0 12678; GFX940-NEXT: ;;#ASMSTART 12679; GFX940-NEXT: ; use s[8:9] 12680; GFX940-NEXT: ;;#ASMEND 12681; GFX940-NEXT: s_setpc_b64 s[30:31] 12682 %vec0 = call <4 x half> asm "; def $0", "=s"() 12683 %vec1 = call <4 x half> asm "; def $0", "=s"() 12684 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 4> 12685 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12686 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12687 ret void 12688} 12689 12690define void @s_shuffle_v3f16_v4f16__7_u_4() { 12691; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_4: 12692; GFX900: ; %bb.0: 12693; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12694; GFX900-NEXT: ;;#ASMSTART 12695; GFX900-NEXT: ; def s[4:5] 12696; GFX900-NEXT: ;;#ASMEND 12697; GFX900-NEXT: s_lshr_b32 s8, s5, 16 12698; GFX900-NEXT: s_mov_b32 s9, s4 12699; GFX900-NEXT: ;;#ASMSTART 12700; GFX900-NEXT: ; use s[8:9] 12701; GFX900-NEXT: ;;#ASMEND 12702; GFX900-NEXT: s_setpc_b64 s[30:31] 12703; 12704; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_4: 12705; GFX90A: ; %bb.0: 12706; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12707; GFX90A-NEXT: ;;#ASMSTART 12708; GFX90A-NEXT: ; def s[4:5] 12709; GFX90A-NEXT: ;;#ASMEND 12710; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 12711; GFX90A-NEXT: s_mov_b32 s9, s4 12712; GFX90A-NEXT: ;;#ASMSTART 12713; GFX90A-NEXT: ; use s[8:9] 12714; GFX90A-NEXT: ;;#ASMEND 12715; GFX90A-NEXT: s_setpc_b64 s[30:31] 12716; 12717; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_4: 12718; GFX940: ; %bb.0: 12719; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12720; GFX940-NEXT: ;;#ASMSTART 12721; GFX940-NEXT: ; def s[0:1] 12722; GFX940-NEXT: ;;#ASMEND 12723; GFX940-NEXT: s_lshr_b32 s8, s1, 16 12724; GFX940-NEXT: s_mov_b32 s9, s0 12725; GFX940-NEXT: ;;#ASMSTART 12726; GFX940-NEXT: ; use s[8:9] 12727; GFX940-NEXT: ;;#ASMEND 12728; GFX940-NEXT: s_setpc_b64 s[30:31] 12729 %vec0 = call <4 x half> asm "; def $0", "=s"() 12730 %vec1 = call <4 x half> asm "; def $0", "=s"() 12731 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 4> 12732 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12733 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12734 ret void 12735} 12736 12737define void @s_shuffle_v3f16_v4f16__7_0_4() { 12738; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_4: 12739; GFX900: ; %bb.0: 12740; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12741; GFX900-NEXT: ;;#ASMSTART 12742; GFX900-NEXT: ; def s[4:5] 12743; GFX900-NEXT: ;;#ASMEND 12744; GFX900-NEXT: ;;#ASMSTART 12745; GFX900-NEXT: ; def s[6:7] 12746; GFX900-NEXT: ;;#ASMEND 12747; GFX900-NEXT: s_lshr_b32 s5, s7, 16 12748; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12749; GFX900-NEXT: s_mov_b32 s9, s6 12750; GFX900-NEXT: ;;#ASMSTART 12751; GFX900-NEXT: ; use s[8:9] 12752; GFX900-NEXT: ;;#ASMEND 12753; GFX900-NEXT: s_setpc_b64 s[30:31] 12754; 12755; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_4: 12756; GFX90A: ; %bb.0: 12757; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12758; GFX90A-NEXT: ;;#ASMSTART 12759; GFX90A-NEXT: ; def s[4:5] 12760; GFX90A-NEXT: ;;#ASMEND 12761; GFX90A-NEXT: ;;#ASMSTART 12762; GFX90A-NEXT: ; def s[6:7] 12763; GFX90A-NEXT: ;;#ASMEND 12764; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 12765; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12766; GFX90A-NEXT: s_mov_b32 s9, s6 12767; GFX90A-NEXT: ;;#ASMSTART 12768; GFX90A-NEXT: ; use s[8:9] 12769; GFX90A-NEXT: ;;#ASMEND 12770; GFX90A-NEXT: s_setpc_b64 s[30:31] 12771; 12772; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_4: 12773; GFX940: ; %bb.0: 12774; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12775; GFX940-NEXT: ;;#ASMSTART 12776; GFX940-NEXT: ; def s[0:1] 12777; GFX940-NEXT: ;;#ASMEND 12778; GFX940-NEXT: ;;#ASMSTART 12779; GFX940-NEXT: ; def s[2:3] 12780; GFX940-NEXT: ;;#ASMEND 12781; GFX940-NEXT: s_lshr_b32 s1, s3, 16 12782; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12783; GFX940-NEXT: s_mov_b32 s9, s2 12784; GFX940-NEXT: ;;#ASMSTART 12785; GFX940-NEXT: ; use s[8:9] 12786; GFX940-NEXT: ;;#ASMEND 12787; GFX940-NEXT: s_setpc_b64 s[30:31] 12788 %vec0 = call <4 x half> asm "; def $0", "=s"() 12789 %vec1 = call <4 x half> asm "; def $0", "=s"() 12790 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 4> 12791 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12792 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12793 ret void 12794} 12795 12796define void @s_shuffle_v3f16_v4f16__7_1_4() { 12797; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_4: 12798; GFX900: ; %bb.0: 12799; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12800; GFX900-NEXT: ;;#ASMSTART 12801; GFX900-NEXT: ; def s[4:5] 12802; GFX900-NEXT: ;;#ASMEND 12803; GFX900-NEXT: ;;#ASMSTART 12804; GFX900-NEXT: ; def s[6:7] 12805; GFX900-NEXT: ;;#ASMEND 12806; GFX900-NEXT: s_lshr_b32 s4, s4, 16 12807; GFX900-NEXT: s_lshr_b32 s5, s7, 16 12808; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12809; GFX900-NEXT: s_mov_b32 s9, s6 12810; GFX900-NEXT: ;;#ASMSTART 12811; GFX900-NEXT: ; use s[8:9] 12812; GFX900-NEXT: ;;#ASMEND 12813; GFX900-NEXT: s_setpc_b64 s[30:31] 12814; 12815; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_4: 12816; GFX90A: ; %bb.0: 12817; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12818; GFX90A-NEXT: ;;#ASMSTART 12819; GFX90A-NEXT: ; def s[4:5] 12820; GFX90A-NEXT: ;;#ASMEND 12821; GFX90A-NEXT: ;;#ASMSTART 12822; GFX90A-NEXT: ; def s[6:7] 12823; GFX90A-NEXT: ;;#ASMEND 12824; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 12825; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 12826; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12827; GFX90A-NEXT: s_mov_b32 s9, s6 12828; GFX90A-NEXT: ;;#ASMSTART 12829; GFX90A-NEXT: ; use s[8:9] 12830; GFX90A-NEXT: ;;#ASMEND 12831; GFX90A-NEXT: s_setpc_b64 s[30:31] 12832; 12833; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_4: 12834; GFX940: ; %bb.0: 12835; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12836; GFX940-NEXT: ;;#ASMSTART 12837; GFX940-NEXT: ; def s[0:1] 12838; GFX940-NEXT: ;;#ASMEND 12839; GFX940-NEXT: ;;#ASMSTART 12840; GFX940-NEXT: ; def s[2:3] 12841; GFX940-NEXT: ;;#ASMEND 12842; GFX940-NEXT: s_lshr_b32 s0, s0, 16 12843; GFX940-NEXT: s_lshr_b32 s1, s3, 16 12844; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12845; GFX940-NEXT: s_mov_b32 s9, s2 12846; GFX940-NEXT: ;;#ASMSTART 12847; GFX940-NEXT: ; use s[8:9] 12848; GFX940-NEXT: ;;#ASMEND 12849; GFX940-NEXT: s_setpc_b64 s[30:31] 12850 %vec0 = call <4 x half> asm "; def $0", "=s"() 12851 %vec1 = call <4 x half> asm "; def $0", "=s"() 12852 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 4> 12853 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12854 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12855 ret void 12856} 12857 12858define void @s_shuffle_v3f16_v4f16__7_2_4() { 12859; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_4: 12860; GFX900: ; %bb.0: 12861; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12862; GFX900-NEXT: ;;#ASMSTART 12863; GFX900-NEXT: ; def s[4:5] 12864; GFX900-NEXT: ;;#ASMEND 12865; GFX900-NEXT: ;;#ASMSTART 12866; GFX900-NEXT: ; def s[6:7] 12867; GFX900-NEXT: ;;#ASMEND 12868; GFX900-NEXT: s_lshr_b32 s4, s7, 16 12869; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12870; GFX900-NEXT: s_mov_b32 s9, s6 12871; GFX900-NEXT: ;;#ASMSTART 12872; GFX900-NEXT: ; use s[8:9] 12873; GFX900-NEXT: ;;#ASMEND 12874; GFX900-NEXT: s_setpc_b64 s[30:31] 12875; 12876; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_4: 12877; GFX90A: ; %bb.0: 12878; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12879; GFX90A-NEXT: ;;#ASMSTART 12880; GFX90A-NEXT: ; def s[4:5] 12881; GFX90A-NEXT: ;;#ASMEND 12882; GFX90A-NEXT: ;;#ASMSTART 12883; GFX90A-NEXT: ; def s[6:7] 12884; GFX90A-NEXT: ;;#ASMEND 12885; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 12886; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 12887; GFX90A-NEXT: s_mov_b32 s9, s6 12888; GFX90A-NEXT: ;;#ASMSTART 12889; GFX90A-NEXT: ; use s[8:9] 12890; GFX90A-NEXT: ;;#ASMEND 12891; GFX90A-NEXT: s_setpc_b64 s[30:31] 12892; 12893; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_4: 12894; GFX940: ; %bb.0: 12895; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12896; GFX940-NEXT: ;;#ASMSTART 12897; GFX940-NEXT: ; def s[0:1] 12898; GFX940-NEXT: ;;#ASMEND 12899; GFX940-NEXT: ;;#ASMSTART 12900; GFX940-NEXT: ; def s[2:3] 12901; GFX940-NEXT: ;;#ASMEND 12902; GFX940-NEXT: s_lshr_b32 s0, s3, 16 12903; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 12904; GFX940-NEXT: s_mov_b32 s9, s2 12905; GFX940-NEXT: ;;#ASMSTART 12906; GFX940-NEXT: ; use s[8:9] 12907; GFX940-NEXT: ;;#ASMEND 12908; GFX940-NEXT: s_setpc_b64 s[30:31] 12909 %vec0 = call <4 x half> asm "; def $0", "=s"() 12910 %vec1 = call <4 x half> asm "; def $0", "=s"() 12911 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 4> 12912 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12913 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12914 ret void 12915} 12916 12917define void @s_shuffle_v3f16_v4f16__7_3_4() { 12918; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_4: 12919; GFX900: ; %bb.0: 12920; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12921; GFX900-NEXT: ;;#ASMSTART 12922; GFX900-NEXT: ; def s[4:5] 12923; GFX900-NEXT: ;;#ASMEND 12924; GFX900-NEXT: ;;#ASMSTART 12925; GFX900-NEXT: ; def s[6:7] 12926; GFX900-NEXT: ;;#ASMEND 12927; GFX900-NEXT: s_lshr_b32 s4, s5, 16 12928; GFX900-NEXT: s_lshr_b32 s5, s7, 16 12929; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12930; GFX900-NEXT: s_mov_b32 s9, s6 12931; GFX900-NEXT: ;;#ASMSTART 12932; GFX900-NEXT: ; use s[8:9] 12933; GFX900-NEXT: ;;#ASMEND 12934; GFX900-NEXT: s_setpc_b64 s[30:31] 12935; 12936; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_4: 12937; GFX90A: ; %bb.0: 12938; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12939; GFX90A-NEXT: ;;#ASMSTART 12940; GFX90A-NEXT: ; def s[4:5] 12941; GFX90A-NEXT: ;;#ASMEND 12942; GFX90A-NEXT: ;;#ASMSTART 12943; GFX90A-NEXT: ; def s[6:7] 12944; GFX90A-NEXT: ;;#ASMEND 12945; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 12946; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 12947; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 12948; GFX90A-NEXT: s_mov_b32 s9, s6 12949; GFX90A-NEXT: ;;#ASMSTART 12950; GFX90A-NEXT: ; use s[8:9] 12951; GFX90A-NEXT: ;;#ASMEND 12952; GFX90A-NEXT: s_setpc_b64 s[30:31] 12953; 12954; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_4: 12955; GFX940: ; %bb.0: 12956; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12957; GFX940-NEXT: ;;#ASMSTART 12958; GFX940-NEXT: ; def s[0:1] 12959; GFX940-NEXT: ;;#ASMEND 12960; GFX940-NEXT: ;;#ASMSTART 12961; GFX940-NEXT: ; def s[2:3] 12962; GFX940-NEXT: ;;#ASMEND 12963; GFX940-NEXT: s_lshr_b32 s0, s1, 16 12964; GFX940-NEXT: s_lshr_b32 s1, s3, 16 12965; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 12966; GFX940-NEXT: s_mov_b32 s9, s2 12967; GFX940-NEXT: ;;#ASMSTART 12968; GFX940-NEXT: ; use s[8:9] 12969; GFX940-NEXT: ;;#ASMEND 12970; GFX940-NEXT: s_setpc_b64 s[30:31] 12971 %vec0 = call <4 x half> asm "; def $0", "=s"() 12972 %vec1 = call <4 x half> asm "; def $0", "=s"() 12973 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 4> 12974 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 12975 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 12976 ret void 12977} 12978 12979define void @s_shuffle_v3f16_v4f16__7_5_4() { 12980; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_4: 12981; GFX900: ; %bb.0: 12982; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12983; GFX900-NEXT: ;;#ASMSTART 12984; GFX900-NEXT: ; def s[4:5] 12985; GFX900-NEXT: ;;#ASMEND 12986; GFX900-NEXT: s_lshr_b32 s6, s4, 16 12987; GFX900-NEXT: s_lshr_b32 s5, s5, 16 12988; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s6 12989; GFX900-NEXT: s_mov_b32 s9, s4 12990; GFX900-NEXT: ;;#ASMSTART 12991; GFX900-NEXT: ; use s[8:9] 12992; GFX900-NEXT: ;;#ASMEND 12993; GFX900-NEXT: s_setpc_b64 s[30:31] 12994; 12995; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_4: 12996; GFX90A: ; %bb.0: 12997; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 12998; GFX90A-NEXT: ;;#ASMSTART 12999; GFX90A-NEXT: ; def s[4:5] 13000; GFX90A-NEXT: ;;#ASMEND 13001; GFX90A-NEXT: s_lshr_b32 s6, s4, 16 13002; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 13003; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s6 13004; GFX90A-NEXT: s_mov_b32 s9, s4 13005; GFX90A-NEXT: ;;#ASMSTART 13006; GFX90A-NEXT: ; use s[8:9] 13007; GFX90A-NEXT: ;;#ASMEND 13008; GFX90A-NEXT: s_setpc_b64 s[30:31] 13009; 13010; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_4: 13011; GFX940: ; %bb.0: 13012; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13013; GFX940-NEXT: ;;#ASMSTART 13014; GFX940-NEXT: ; def s[0:1] 13015; GFX940-NEXT: ;;#ASMEND 13016; GFX940-NEXT: s_lshr_b32 s2, s0, 16 13017; GFX940-NEXT: s_lshr_b32 s1, s1, 16 13018; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s2 13019; GFX940-NEXT: s_mov_b32 s9, s0 13020; GFX940-NEXT: ;;#ASMSTART 13021; GFX940-NEXT: ; use s[8:9] 13022; GFX940-NEXT: ;;#ASMEND 13023; GFX940-NEXT: s_setpc_b64 s[30:31] 13024 %vec0 = call <4 x half> asm "; def $0", "=s"() 13025 %vec1 = call <4 x half> asm "; def $0", "=s"() 13026 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 4> 13027 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13028 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13029 ret void 13030} 13031 13032define void @s_shuffle_v3f16_v4f16__7_6_4() { 13033; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_4: 13034; GFX900: ; %bb.0: 13035; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13036; GFX900-NEXT: ;;#ASMSTART 13037; GFX900-NEXT: ; def s[4:5] 13038; GFX900-NEXT: ;;#ASMEND 13039; GFX900-NEXT: s_lshr_b32 s6, s5, 16 13040; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 13041; GFX900-NEXT: s_mov_b32 s9, s4 13042; GFX900-NEXT: ;;#ASMSTART 13043; GFX900-NEXT: ; use s[8:9] 13044; GFX900-NEXT: ;;#ASMEND 13045; GFX900-NEXT: s_setpc_b64 s[30:31] 13046; 13047; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_4: 13048; GFX90A: ; %bb.0: 13049; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13050; GFX90A-NEXT: ;;#ASMSTART 13051; GFX90A-NEXT: ; def s[4:5] 13052; GFX90A-NEXT: ;;#ASMEND 13053; GFX90A-NEXT: s_lshr_b32 s6, s5, 16 13054; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 13055; GFX90A-NEXT: s_mov_b32 s9, s4 13056; GFX90A-NEXT: ;;#ASMSTART 13057; GFX90A-NEXT: ; use s[8:9] 13058; GFX90A-NEXT: ;;#ASMEND 13059; GFX90A-NEXT: s_setpc_b64 s[30:31] 13060; 13061; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_4: 13062; GFX940: ; %bb.0: 13063; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13064; GFX940-NEXT: ;;#ASMSTART 13065; GFX940-NEXT: ; def s[0:1] 13066; GFX940-NEXT: ;;#ASMEND 13067; GFX940-NEXT: s_lshr_b32 s2, s1, 16 13068; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 13069; GFX940-NEXT: s_mov_b32 s9, s0 13070; GFX940-NEXT: ;;#ASMSTART 13071; GFX940-NEXT: ; use s[8:9] 13072; GFX940-NEXT: ;;#ASMEND 13073; GFX940-NEXT: s_setpc_b64 s[30:31] 13074 %vec0 = call <4 x half> asm "; def $0", "=s"() 13075 %vec1 = call <4 x half> asm "; def $0", "=s"() 13076 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 4> 13077 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13078 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13079 ret void 13080} 13081 13082define void @s_shuffle_v3f16_v4f16__u_5_5() { 13083; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_5_5: 13084; GFX9: ; %bb.0: 13085; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13086; GFX9-NEXT: ;;#ASMSTART 13087; GFX9-NEXT: ; def s[8:9] 13088; GFX9-NEXT: ;;#ASMEND 13089; GFX9-NEXT: s_lshr_b32 s9, s8, 16 13090; GFX9-NEXT: ;;#ASMSTART 13091; GFX9-NEXT: ; use s[8:9] 13092; GFX9-NEXT: ;;#ASMEND 13093; GFX9-NEXT: s_setpc_b64 s[30:31] 13094 %vec0 = call <4 x half> asm "; def $0", "=s"() 13095 %vec1 = call <4 x half> asm "; def $0", "=s"() 13096 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 5, i32 5> 13097 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13098 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13099 ret void 13100} 13101 13102define void @s_shuffle_v3f16_v4f16__0_5_5() { 13103; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_5_5: 13104; GFX900: ; %bb.0: 13105; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13106; GFX900-NEXT: ;;#ASMSTART 13107; GFX900-NEXT: ; def s[6:7] 13108; GFX900-NEXT: ;;#ASMEND 13109; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13110; GFX900-NEXT: ;;#ASMSTART 13111; GFX900-NEXT: ; def s[4:5] 13112; GFX900-NEXT: ;;#ASMEND 13113; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13114; GFX900-NEXT: ;;#ASMSTART 13115; GFX900-NEXT: ; use s[8:9] 13116; GFX900-NEXT: ;;#ASMEND 13117; GFX900-NEXT: s_setpc_b64 s[30:31] 13118; 13119; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_5_5: 13120; GFX90A: ; %bb.0: 13121; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13122; GFX90A-NEXT: ;;#ASMSTART 13123; GFX90A-NEXT: ; def s[6:7] 13124; GFX90A-NEXT: ;;#ASMEND 13125; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13126; GFX90A-NEXT: ;;#ASMSTART 13127; GFX90A-NEXT: ; def s[4:5] 13128; GFX90A-NEXT: ;;#ASMEND 13129; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13130; GFX90A-NEXT: ;;#ASMSTART 13131; GFX90A-NEXT: ; use s[8:9] 13132; GFX90A-NEXT: ;;#ASMEND 13133; GFX90A-NEXT: s_setpc_b64 s[30:31] 13134; 13135; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_5_5: 13136; GFX940: ; %bb.0: 13137; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13138; GFX940-NEXT: ;;#ASMSTART 13139; GFX940-NEXT: ; def s[2:3] 13140; GFX940-NEXT: ;;#ASMEND 13141; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13142; GFX940-NEXT: ;;#ASMSTART 13143; GFX940-NEXT: ; def s[0:1] 13144; GFX940-NEXT: ;;#ASMEND 13145; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 13146; GFX940-NEXT: ;;#ASMSTART 13147; GFX940-NEXT: ; use s[8:9] 13148; GFX940-NEXT: ;;#ASMEND 13149; GFX940-NEXT: s_setpc_b64 s[30:31] 13150 %vec0 = call <4 x half> asm "; def $0", "=s"() 13151 %vec1 = call <4 x half> asm "; def $0", "=s"() 13152 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 5, i32 5> 13153 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13154 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13155 ret void 13156} 13157 13158define void @s_shuffle_v3f16_v4f16__1_5_5() { 13159; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_5_5: 13160; GFX900: ; %bb.0: 13161; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13162; GFX900-NEXT: ;;#ASMSTART 13163; GFX900-NEXT: ; def s[4:5] 13164; GFX900-NEXT: ;;#ASMEND 13165; GFX900-NEXT: ;;#ASMSTART 13166; GFX900-NEXT: ; def s[6:7] 13167; GFX900-NEXT: ;;#ASMEND 13168; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13169; GFX900-NEXT: s_lshr_b32 s4, s4, 16 13170; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13171; GFX900-NEXT: ;;#ASMSTART 13172; GFX900-NEXT: ; use s[8:9] 13173; GFX900-NEXT: ;;#ASMEND 13174; GFX900-NEXT: s_setpc_b64 s[30:31] 13175; 13176; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_5_5: 13177; GFX90A: ; %bb.0: 13178; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13179; GFX90A-NEXT: ;;#ASMSTART 13180; GFX90A-NEXT: ; def s[4:5] 13181; GFX90A-NEXT: ;;#ASMEND 13182; GFX90A-NEXT: ;;#ASMSTART 13183; GFX90A-NEXT: ; def s[6:7] 13184; GFX90A-NEXT: ;;#ASMEND 13185; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13186; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 13187; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13188; GFX90A-NEXT: ;;#ASMSTART 13189; GFX90A-NEXT: ; use s[8:9] 13190; GFX90A-NEXT: ;;#ASMEND 13191; GFX90A-NEXT: s_setpc_b64 s[30:31] 13192; 13193; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_5_5: 13194; GFX940: ; %bb.0: 13195; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13196; GFX940-NEXT: ;;#ASMSTART 13197; GFX940-NEXT: ; def s[0:1] 13198; GFX940-NEXT: ;;#ASMEND 13199; GFX940-NEXT: ;;#ASMSTART 13200; GFX940-NEXT: ; def s[2:3] 13201; GFX940-NEXT: ;;#ASMEND 13202; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13203; GFX940-NEXT: s_lshr_b32 s0, s0, 16 13204; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 13205; GFX940-NEXT: ;;#ASMSTART 13206; GFX940-NEXT: ; use s[8:9] 13207; GFX940-NEXT: ;;#ASMEND 13208; GFX940-NEXT: s_setpc_b64 s[30:31] 13209 %vec0 = call <4 x half> asm "; def $0", "=s"() 13210 %vec1 = call <4 x half> asm "; def $0", "=s"() 13211 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 5, i32 5> 13212 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13213 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13214 ret void 13215} 13216 13217define void @s_shuffle_v3f16_v4f16__2_5_5() { 13218; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_5_5: 13219; GFX900: ; %bb.0: 13220; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13221; GFX900-NEXT: ;;#ASMSTART 13222; GFX900-NEXT: ; def s[6:7] 13223; GFX900-NEXT: ;;#ASMEND 13224; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13225; GFX900-NEXT: ;;#ASMSTART 13226; GFX900-NEXT: ; def s[4:5] 13227; GFX900-NEXT: ;;#ASMEND 13228; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 13229; GFX900-NEXT: ;;#ASMSTART 13230; GFX900-NEXT: ; use s[8:9] 13231; GFX900-NEXT: ;;#ASMEND 13232; GFX900-NEXT: s_setpc_b64 s[30:31] 13233; 13234; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_5_5: 13235; GFX90A: ; %bb.0: 13236; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13237; GFX90A-NEXT: ;;#ASMSTART 13238; GFX90A-NEXT: ; def s[6:7] 13239; GFX90A-NEXT: ;;#ASMEND 13240; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13241; GFX90A-NEXT: ;;#ASMSTART 13242; GFX90A-NEXT: ; def s[4:5] 13243; GFX90A-NEXT: ;;#ASMEND 13244; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 13245; GFX90A-NEXT: ;;#ASMSTART 13246; GFX90A-NEXT: ; use s[8:9] 13247; GFX90A-NEXT: ;;#ASMEND 13248; GFX90A-NEXT: s_setpc_b64 s[30:31] 13249; 13250; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_5_5: 13251; GFX940: ; %bb.0: 13252; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13253; GFX940-NEXT: ;;#ASMSTART 13254; GFX940-NEXT: ; def s[2:3] 13255; GFX940-NEXT: ;;#ASMEND 13256; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13257; GFX940-NEXT: ;;#ASMSTART 13258; GFX940-NEXT: ; def s[0:1] 13259; GFX940-NEXT: ;;#ASMEND 13260; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 13261; GFX940-NEXT: ;;#ASMSTART 13262; GFX940-NEXT: ; use s[8:9] 13263; GFX940-NEXT: ;;#ASMEND 13264; GFX940-NEXT: s_setpc_b64 s[30:31] 13265 %vec0 = call <4 x half> asm "; def $0", "=s"() 13266 %vec1 = call <4 x half> asm "; def $0", "=s"() 13267 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 5, i32 5> 13268 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13269 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13270 ret void 13271} 13272 13273define void @s_shuffle_v3f16_v4f16__3_5_5() { 13274; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_5_5: 13275; GFX900: ; %bb.0: 13276; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13277; GFX900-NEXT: ;;#ASMSTART 13278; GFX900-NEXT: ; def s[4:5] 13279; GFX900-NEXT: ;;#ASMEND 13280; GFX900-NEXT: ;;#ASMSTART 13281; GFX900-NEXT: ; def s[6:7] 13282; GFX900-NEXT: ;;#ASMEND 13283; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13284; GFX900-NEXT: s_lshr_b32 s4, s5, 16 13285; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13286; GFX900-NEXT: ;;#ASMSTART 13287; GFX900-NEXT: ; use s[8:9] 13288; GFX900-NEXT: ;;#ASMEND 13289; GFX900-NEXT: s_setpc_b64 s[30:31] 13290; 13291; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_5_5: 13292; GFX90A: ; %bb.0: 13293; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13294; GFX90A-NEXT: ;;#ASMSTART 13295; GFX90A-NEXT: ; def s[4:5] 13296; GFX90A-NEXT: ;;#ASMEND 13297; GFX90A-NEXT: ;;#ASMSTART 13298; GFX90A-NEXT: ; def s[6:7] 13299; GFX90A-NEXT: ;;#ASMEND 13300; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13301; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 13302; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13303; GFX90A-NEXT: ;;#ASMSTART 13304; GFX90A-NEXT: ; use s[8:9] 13305; GFX90A-NEXT: ;;#ASMEND 13306; GFX90A-NEXT: s_setpc_b64 s[30:31] 13307; 13308; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_5_5: 13309; GFX940: ; %bb.0: 13310; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13311; GFX940-NEXT: ;;#ASMSTART 13312; GFX940-NEXT: ; def s[0:1] 13313; GFX940-NEXT: ;;#ASMEND 13314; GFX940-NEXT: ;;#ASMSTART 13315; GFX940-NEXT: ; def s[2:3] 13316; GFX940-NEXT: ;;#ASMEND 13317; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13318; GFX940-NEXT: s_lshr_b32 s0, s1, 16 13319; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 13320; GFX940-NEXT: ;;#ASMSTART 13321; GFX940-NEXT: ; use s[8:9] 13322; GFX940-NEXT: ;;#ASMEND 13323; GFX940-NEXT: s_setpc_b64 s[30:31] 13324 %vec0 = call <4 x half> asm "; def $0", "=s"() 13325 %vec1 = call <4 x half> asm "; def $0", "=s"() 13326 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 5, i32 5> 13327 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13328 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13329 ret void 13330} 13331 13332define void @s_shuffle_v3f16_v4f16__4_5_5() { 13333; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_5_5: 13334; GFX9: ; %bb.0: 13335; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13336; GFX9-NEXT: ;;#ASMSTART 13337; GFX9-NEXT: ; def s[8:9] 13338; GFX9-NEXT: ;;#ASMEND 13339; GFX9-NEXT: s_lshr_b32 s9, s8, 16 13340; GFX9-NEXT: ;;#ASMSTART 13341; GFX9-NEXT: ; use s[8:9] 13342; GFX9-NEXT: ;;#ASMEND 13343; GFX9-NEXT: s_setpc_b64 s[30:31] 13344 %vec0 = call <4 x half> asm "; def $0", "=s"() 13345 %vec1 = call <4 x half> asm "; def $0", "=s"() 13346 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 5, i32 5> 13347 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13348 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13349 ret void 13350} 13351 13352define void @s_shuffle_v3f16_v4f16__5_5_5() { 13353; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_5_5: 13354; GFX900: ; %bb.0: 13355; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13356; GFX900-NEXT: ;;#ASMSTART 13357; GFX900-NEXT: ; def s[4:5] 13358; GFX900-NEXT: ;;#ASMEND 13359; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13360; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s9 13361; GFX900-NEXT: ;;#ASMSTART 13362; GFX900-NEXT: ; use s[8:9] 13363; GFX900-NEXT: ;;#ASMEND 13364; GFX900-NEXT: s_setpc_b64 s[30:31] 13365; 13366; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_5_5: 13367; GFX90A: ; %bb.0: 13368; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13369; GFX90A-NEXT: ;;#ASMSTART 13370; GFX90A-NEXT: ; def s[4:5] 13371; GFX90A-NEXT: ;;#ASMEND 13372; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13373; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s9 13374; GFX90A-NEXT: ;;#ASMSTART 13375; GFX90A-NEXT: ; use s[8:9] 13376; GFX90A-NEXT: ;;#ASMEND 13377; GFX90A-NEXT: s_setpc_b64 s[30:31] 13378; 13379; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_5_5: 13380; GFX940: ; %bb.0: 13381; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13382; GFX940-NEXT: ;;#ASMSTART 13383; GFX940-NEXT: ; def s[0:1] 13384; GFX940-NEXT: ;;#ASMEND 13385; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13386; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s9 13387; GFX940-NEXT: ;;#ASMSTART 13388; GFX940-NEXT: ; use s[8:9] 13389; GFX940-NEXT: ;;#ASMEND 13390; GFX940-NEXT: s_setpc_b64 s[30:31] 13391 %vec0 = call <4 x half> asm "; def $0", "=s"() 13392 %vec1 = call <4 x half> asm "; def $0", "=s"() 13393 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 5, i32 5> 13394 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13395 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13396 ret void 13397} 13398 13399define void @s_shuffle_v3f16_v4f16__6_5_5() { 13400; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_5_5: 13401; GFX900: ; %bb.0: 13402; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13403; GFX900-NEXT: ;;#ASMSTART 13404; GFX900-NEXT: ; def s[4:5] 13405; GFX900-NEXT: ;;#ASMEND 13406; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13407; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 13408; GFX900-NEXT: ;;#ASMSTART 13409; GFX900-NEXT: ; use s[8:9] 13410; GFX900-NEXT: ;;#ASMEND 13411; GFX900-NEXT: s_setpc_b64 s[30:31] 13412; 13413; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_5_5: 13414; GFX90A: ; %bb.0: 13415; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13416; GFX90A-NEXT: ;;#ASMSTART 13417; GFX90A-NEXT: ; def s[4:5] 13418; GFX90A-NEXT: ;;#ASMEND 13419; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13420; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 13421; GFX90A-NEXT: ;;#ASMSTART 13422; GFX90A-NEXT: ; use s[8:9] 13423; GFX90A-NEXT: ;;#ASMEND 13424; GFX90A-NEXT: s_setpc_b64 s[30:31] 13425; 13426; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_5_5: 13427; GFX940: ; %bb.0: 13428; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13429; GFX940-NEXT: ;;#ASMSTART 13430; GFX940-NEXT: ; def s[0:1] 13431; GFX940-NEXT: ;;#ASMEND 13432; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13433; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 13434; GFX940-NEXT: ;;#ASMSTART 13435; GFX940-NEXT: ; use s[8:9] 13436; GFX940-NEXT: ;;#ASMEND 13437; GFX940-NEXT: s_setpc_b64 s[30:31] 13438 %vec0 = call <4 x half> asm "; def $0", "=s"() 13439 %vec1 = call <4 x half> asm "; def $0", "=s"() 13440 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 5, i32 5> 13441 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13442 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13443 ret void 13444} 13445 13446define void @s_shuffle_v3f16_v4f16__7_5_5() { 13447; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_5: 13448; GFX900: ; %bb.0: 13449; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13450; GFX900-NEXT: ;;#ASMSTART 13451; GFX900-NEXT: ; def s[4:5] 13452; GFX900-NEXT: ;;#ASMEND 13453; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13454; GFX900-NEXT: s_lshr_b32 s4, s5, 16 13455; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13456; GFX900-NEXT: ;;#ASMSTART 13457; GFX900-NEXT: ; use s[8:9] 13458; GFX900-NEXT: ;;#ASMEND 13459; GFX900-NEXT: s_setpc_b64 s[30:31] 13460; 13461; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_5: 13462; GFX90A: ; %bb.0: 13463; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13464; GFX90A-NEXT: ;;#ASMSTART 13465; GFX90A-NEXT: ; def s[4:5] 13466; GFX90A-NEXT: ;;#ASMEND 13467; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13468; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 13469; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13470; GFX90A-NEXT: ;;#ASMSTART 13471; GFX90A-NEXT: ; use s[8:9] 13472; GFX90A-NEXT: ;;#ASMEND 13473; GFX90A-NEXT: s_setpc_b64 s[30:31] 13474; 13475; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_5: 13476; GFX940: ; %bb.0: 13477; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13478; GFX940-NEXT: ;;#ASMSTART 13479; GFX940-NEXT: ; def s[0:1] 13480; GFX940-NEXT: ;;#ASMEND 13481; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13482; GFX940-NEXT: s_lshr_b32 s0, s1, 16 13483; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 13484; GFX940-NEXT: ;;#ASMSTART 13485; GFX940-NEXT: ; use s[8:9] 13486; GFX940-NEXT: ;;#ASMEND 13487; GFX940-NEXT: s_setpc_b64 s[30:31] 13488 %vec0 = call <4 x half> asm "; def $0", "=s"() 13489 %vec1 = call <4 x half> asm "; def $0", "=s"() 13490 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 5> 13491 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13492 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13493 ret void 13494} 13495 13496define void @s_shuffle_v3f16_v4f16__7_u_5() { 13497; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_5: 13498; GFX900: ; %bb.0: 13499; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13500; GFX900-NEXT: ;;#ASMSTART 13501; GFX900-NEXT: ; def s[4:5] 13502; GFX900-NEXT: ;;#ASMEND 13503; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13504; GFX900-NEXT: s_lshr_b32 s8, s5, 16 13505; GFX900-NEXT: ;;#ASMSTART 13506; GFX900-NEXT: ; use s[8:9] 13507; GFX900-NEXT: ;;#ASMEND 13508; GFX900-NEXT: s_setpc_b64 s[30:31] 13509; 13510; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_5: 13511; GFX90A: ; %bb.0: 13512; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13513; GFX90A-NEXT: ;;#ASMSTART 13514; GFX90A-NEXT: ; def s[4:5] 13515; GFX90A-NEXT: ;;#ASMEND 13516; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13517; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 13518; GFX90A-NEXT: ;;#ASMSTART 13519; GFX90A-NEXT: ; use s[8:9] 13520; GFX90A-NEXT: ;;#ASMEND 13521; GFX90A-NEXT: s_setpc_b64 s[30:31] 13522; 13523; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_5: 13524; GFX940: ; %bb.0: 13525; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13526; GFX940-NEXT: ;;#ASMSTART 13527; GFX940-NEXT: ; def s[0:1] 13528; GFX940-NEXT: ;;#ASMEND 13529; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13530; GFX940-NEXT: s_lshr_b32 s8, s1, 16 13531; GFX940-NEXT: ;;#ASMSTART 13532; GFX940-NEXT: ; use s[8:9] 13533; GFX940-NEXT: ;;#ASMEND 13534; GFX940-NEXT: s_setpc_b64 s[30:31] 13535 %vec0 = call <4 x half> asm "; def $0", "=s"() 13536 %vec1 = call <4 x half> asm "; def $0", "=s"() 13537 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 5> 13538 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13539 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13540 ret void 13541} 13542 13543define void @s_shuffle_v3f16_v4f16__7_0_5() { 13544; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_5: 13545; GFX900: ; %bb.0: 13546; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13547; GFX900-NEXT: ;;#ASMSTART 13548; GFX900-NEXT: ; def s[4:5] 13549; GFX900-NEXT: ;;#ASMEND 13550; GFX900-NEXT: ;;#ASMSTART 13551; GFX900-NEXT: ; def s[6:7] 13552; GFX900-NEXT: ;;#ASMEND 13553; GFX900-NEXT: s_lshr_b32 s5, s7, 16 13554; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13555; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13556; GFX900-NEXT: ;;#ASMSTART 13557; GFX900-NEXT: ; use s[8:9] 13558; GFX900-NEXT: ;;#ASMEND 13559; GFX900-NEXT: s_setpc_b64 s[30:31] 13560; 13561; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_5: 13562; GFX90A: ; %bb.0: 13563; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13564; GFX90A-NEXT: ;;#ASMSTART 13565; GFX90A-NEXT: ; def s[4:5] 13566; GFX90A-NEXT: ;;#ASMEND 13567; GFX90A-NEXT: ;;#ASMSTART 13568; GFX90A-NEXT: ; def s[6:7] 13569; GFX90A-NEXT: ;;#ASMEND 13570; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 13571; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13572; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13573; GFX90A-NEXT: ;;#ASMSTART 13574; GFX90A-NEXT: ; use s[8:9] 13575; GFX90A-NEXT: ;;#ASMEND 13576; GFX90A-NEXT: s_setpc_b64 s[30:31] 13577; 13578; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_5: 13579; GFX940: ; %bb.0: 13580; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13581; GFX940-NEXT: ;;#ASMSTART 13582; GFX940-NEXT: ; def s[0:1] 13583; GFX940-NEXT: ;;#ASMEND 13584; GFX940-NEXT: ;;#ASMSTART 13585; GFX940-NEXT: ; def s[2:3] 13586; GFX940-NEXT: ;;#ASMEND 13587; GFX940-NEXT: s_lshr_b32 s1, s3, 16 13588; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13589; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13590; GFX940-NEXT: ;;#ASMSTART 13591; GFX940-NEXT: ; use s[8:9] 13592; GFX940-NEXT: ;;#ASMEND 13593; GFX940-NEXT: s_setpc_b64 s[30:31] 13594 %vec0 = call <4 x half> asm "; def $0", "=s"() 13595 %vec1 = call <4 x half> asm "; def $0", "=s"() 13596 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 5> 13597 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13598 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13599 ret void 13600} 13601 13602define void @s_shuffle_v3f16_v4f16__7_1_5() { 13603; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_5: 13604; GFX900: ; %bb.0: 13605; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13606; GFX900-NEXT: ;;#ASMSTART 13607; GFX900-NEXT: ; def s[4:5] 13608; GFX900-NEXT: ;;#ASMEND 13609; GFX900-NEXT: ;;#ASMSTART 13610; GFX900-NEXT: ; def s[6:7] 13611; GFX900-NEXT: ;;#ASMEND 13612; GFX900-NEXT: s_lshr_b32 s4, s4, 16 13613; GFX900-NEXT: s_lshr_b32 s5, s7, 16 13614; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13615; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13616; GFX900-NEXT: ;;#ASMSTART 13617; GFX900-NEXT: ; use s[8:9] 13618; GFX900-NEXT: ;;#ASMEND 13619; GFX900-NEXT: s_setpc_b64 s[30:31] 13620; 13621; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_5: 13622; GFX90A: ; %bb.0: 13623; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13624; GFX90A-NEXT: ;;#ASMSTART 13625; GFX90A-NEXT: ; def s[4:5] 13626; GFX90A-NEXT: ;;#ASMEND 13627; GFX90A-NEXT: ;;#ASMSTART 13628; GFX90A-NEXT: ; def s[6:7] 13629; GFX90A-NEXT: ;;#ASMEND 13630; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 13631; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 13632; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13633; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13634; GFX90A-NEXT: ;;#ASMSTART 13635; GFX90A-NEXT: ; use s[8:9] 13636; GFX90A-NEXT: ;;#ASMEND 13637; GFX90A-NEXT: s_setpc_b64 s[30:31] 13638; 13639; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_5: 13640; GFX940: ; %bb.0: 13641; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13642; GFX940-NEXT: ;;#ASMSTART 13643; GFX940-NEXT: ; def s[0:1] 13644; GFX940-NEXT: ;;#ASMEND 13645; GFX940-NEXT: ;;#ASMSTART 13646; GFX940-NEXT: ; def s[2:3] 13647; GFX940-NEXT: ;;#ASMEND 13648; GFX940-NEXT: s_lshr_b32 s0, s0, 16 13649; GFX940-NEXT: s_lshr_b32 s1, s3, 16 13650; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13651; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13652; GFX940-NEXT: ;;#ASMSTART 13653; GFX940-NEXT: ; use s[8:9] 13654; GFX940-NEXT: ;;#ASMEND 13655; GFX940-NEXT: s_setpc_b64 s[30:31] 13656 %vec0 = call <4 x half> asm "; def $0", "=s"() 13657 %vec1 = call <4 x half> asm "; def $0", "=s"() 13658 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 5> 13659 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13660 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13661 ret void 13662} 13663 13664define void @s_shuffle_v3f16_v4f16__7_2_5() { 13665; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_5: 13666; GFX900: ; %bb.0: 13667; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13668; GFX900-NEXT: ;;#ASMSTART 13669; GFX900-NEXT: ; def s[4:5] 13670; GFX900-NEXT: ;;#ASMEND 13671; GFX900-NEXT: ;;#ASMSTART 13672; GFX900-NEXT: ; def s[6:7] 13673; GFX900-NEXT: ;;#ASMEND 13674; GFX900-NEXT: s_lshr_b32 s4, s7, 16 13675; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 13676; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13677; GFX900-NEXT: ;;#ASMSTART 13678; GFX900-NEXT: ; use s[8:9] 13679; GFX900-NEXT: ;;#ASMEND 13680; GFX900-NEXT: s_setpc_b64 s[30:31] 13681; 13682; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_5: 13683; GFX90A: ; %bb.0: 13684; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13685; GFX90A-NEXT: ;;#ASMSTART 13686; GFX90A-NEXT: ; def s[4:5] 13687; GFX90A-NEXT: ;;#ASMEND 13688; GFX90A-NEXT: ;;#ASMSTART 13689; GFX90A-NEXT: ; def s[6:7] 13690; GFX90A-NEXT: ;;#ASMEND 13691; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 13692; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 13693; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13694; GFX90A-NEXT: ;;#ASMSTART 13695; GFX90A-NEXT: ; use s[8:9] 13696; GFX90A-NEXT: ;;#ASMEND 13697; GFX90A-NEXT: s_setpc_b64 s[30:31] 13698; 13699; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_5: 13700; GFX940: ; %bb.0: 13701; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13702; GFX940-NEXT: ;;#ASMSTART 13703; GFX940-NEXT: ; def s[0:1] 13704; GFX940-NEXT: ;;#ASMEND 13705; GFX940-NEXT: ;;#ASMSTART 13706; GFX940-NEXT: ; def s[2:3] 13707; GFX940-NEXT: ;;#ASMEND 13708; GFX940-NEXT: s_lshr_b32 s0, s3, 16 13709; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 13710; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13711; GFX940-NEXT: ;;#ASMSTART 13712; GFX940-NEXT: ; use s[8:9] 13713; GFX940-NEXT: ;;#ASMEND 13714; GFX940-NEXT: s_setpc_b64 s[30:31] 13715 %vec0 = call <4 x half> asm "; def $0", "=s"() 13716 %vec1 = call <4 x half> asm "; def $0", "=s"() 13717 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 5> 13718 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13719 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13720 ret void 13721} 13722 13723define void @s_shuffle_v3f16_v4f16__7_3_5() { 13724; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_5: 13725; GFX900: ; %bb.0: 13726; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13727; GFX900-NEXT: ;;#ASMSTART 13728; GFX900-NEXT: ; def s[4:5] 13729; GFX900-NEXT: ;;#ASMEND 13730; GFX900-NEXT: ;;#ASMSTART 13731; GFX900-NEXT: ; def s[6:7] 13732; GFX900-NEXT: ;;#ASMEND 13733; GFX900-NEXT: s_lshr_b32 s4, s5, 16 13734; GFX900-NEXT: s_lshr_b32 s5, s7, 16 13735; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13736; GFX900-NEXT: s_lshr_b32 s9, s6, 16 13737; GFX900-NEXT: ;;#ASMSTART 13738; GFX900-NEXT: ; use s[8:9] 13739; GFX900-NEXT: ;;#ASMEND 13740; GFX900-NEXT: s_setpc_b64 s[30:31] 13741; 13742; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_5: 13743; GFX90A: ; %bb.0: 13744; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13745; GFX90A-NEXT: ;;#ASMSTART 13746; GFX90A-NEXT: ; def s[4:5] 13747; GFX90A-NEXT: ;;#ASMEND 13748; GFX90A-NEXT: ;;#ASMSTART 13749; GFX90A-NEXT: ; def s[6:7] 13750; GFX90A-NEXT: ;;#ASMEND 13751; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 13752; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 13753; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13754; GFX90A-NEXT: s_lshr_b32 s9, s6, 16 13755; GFX90A-NEXT: ;;#ASMSTART 13756; GFX90A-NEXT: ; use s[8:9] 13757; GFX90A-NEXT: ;;#ASMEND 13758; GFX90A-NEXT: s_setpc_b64 s[30:31] 13759; 13760; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_5: 13761; GFX940: ; %bb.0: 13762; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13763; GFX940-NEXT: ;;#ASMSTART 13764; GFX940-NEXT: ; def s[0:1] 13765; GFX940-NEXT: ;;#ASMEND 13766; GFX940-NEXT: ;;#ASMSTART 13767; GFX940-NEXT: ; def s[2:3] 13768; GFX940-NEXT: ;;#ASMEND 13769; GFX940-NEXT: s_lshr_b32 s0, s1, 16 13770; GFX940-NEXT: s_lshr_b32 s1, s3, 16 13771; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13772; GFX940-NEXT: s_lshr_b32 s9, s2, 16 13773; GFX940-NEXT: ;;#ASMSTART 13774; GFX940-NEXT: ; use s[8:9] 13775; GFX940-NEXT: ;;#ASMEND 13776; GFX940-NEXT: s_setpc_b64 s[30:31] 13777 %vec0 = call <4 x half> asm "; def $0", "=s"() 13778 %vec1 = call <4 x half> asm "; def $0", "=s"() 13779 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 5> 13780 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13781 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13782 ret void 13783} 13784 13785define void @s_shuffle_v3f16_v4f16__7_4_5() { 13786; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_5: 13787; GFX900: ; %bb.0: 13788; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13789; GFX900-NEXT: ;;#ASMSTART 13790; GFX900-NEXT: ; def s[4:5] 13791; GFX900-NEXT: ;;#ASMEND 13792; GFX900-NEXT: s_lshr_b32 s5, s5, 16 13793; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13794; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13795; GFX900-NEXT: ;;#ASMSTART 13796; GFX900-NEXT: ; use s[8:9] 13797; GFX900-NEXT: ;;#ASMEND 13798; GFX900-NEXT: s_setpc_b64 s[30:31] 13799; 13800; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_5: 13801; GFX90A: ; %bb.0: 13802; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13803; GFX90A-NEXT: ;;#ASMSTART 13804; GFX90A-NEXT: ; def s[4:5] 13805; GFX90A-NEXT: ;;#ASMEND 13806; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 13807; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 13808; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13809; GFX90A-NEXT: ;;#ASMSTART 13810; GFX90A-NEXT: ; use s[8:9] 13811; GFX90A-NEXT: ;;#ASMEND 13812; GFX90A-NEXT: s_setpc_b64 s[30:31] 13813; 13814; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_5: 13815; GFX940: ; %bb.0: 13816; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13817; GFX940-NEXT: ;;#ASMSTART 13818; GFX940-NEXT: ; def s[0:1] 13819; GFX940-NEXT: ;;#ASMEND 13820; GFX940-NEXT: s_lshr_b32 s1, s1, 16 13821; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 13822; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13823; GFX940-NEXT: ;;#ASMSTART 13824; GFX940-NEXT: ; use s[8:9] 13825; GFX940-NEXT: ;;#ASMEND 13826; GFX940-NEXT: s_setpc_b64 s[30:31] 13827 %vec0 = call <4 x half> asm "; def $0", "=s"() 13828 %vec1 = call <4 x half> asm "; def $0", "=s"() 13829 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 5> 13830 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13831 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13832 ret void 13833} 13834 13835define void @s_shuffle_v3f16_v4f16__7_6_5() { 13836; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_5: 13837; GFX900: ; %bb.0: 13838; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13839; GFX900-NEXT: ;;#ASMSTART 13840; GFX900-NEXT: ; def s[4:5] 13841; GFX900-NEXT: ;;#ASMEND 13842; GFX900-NEXT: s_lshr_b32 s6, s5, 16 13843; GFX900-NEXT: s_pack_ll_b32_b16 s8, s6, s5 13844; GFX900-NEXT: s_lshr_b32 s9, s4, 16 13845; GFX900-NEXT: ;;#ASMSTART 13846; GFX900-NEXT: ; use s[8:9] 13847; GFX900-NEXT: ;;#ASMEND 13848; GFX900-NEXT: s_setpc_b64 s[30:31] 13849; 13850; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_5: 13851; GFX90A: ; %bb.0: 13852; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13853; GFX90A-NEXT: ;;#ASMSTART 13854; GFX90A-NEXT: ; def s[4:5] 13855; GFX90A-NEXT: ;;#ASMEND 13856; GFX90A-NEXT: s_lshr_b32 s6, s5, 16 13857; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s6, s5 13858; GFX90A-NEXT: s_lshr_b32 s9, s4, 16 13859; GFX90A-NEXT: ;;#ASMSTART 13860; GFX90A-NEXT: ; use s[8:9] 13861; GFX90A-NEXT: ;;#ASMEND 13862; GFX90A-NEXT: s_setpc_b64 s[30:31] 13863; 13864; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_5: 13865; GFX940: ; %bb.0: 13866; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13867; GFX940-NEXT: ;;#ASMSTART 13868; GFX940-NEXT: ; def s[0:1] 13869; GFX940-NEXT: ;;#ASMEND 13870; GFX940-NEXT: s_lshr_b32 s2, s1, 16 13871; GFX940-NEXT: s_pack_ll_b32_b16 s8, s2, s1 13872; GFX940-NEXT: s_lshr_b32 s9, s0, 16 13873; GFX940-NEXT: ;;#ASMSTART 13874; GFX940-NEXT: ; use s[8:9] 13875; GFX940-NEXT: ;;#ASMEND 13876; GFX940-NEXT: s_setpc_b64 s[30:31] 13877 %vec0 = call <4 x half> asm "; def $0", "=s"() 13878 %vec1 = call <4 x half> asm "; def $0", "=s"() 13879 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 5> 13880 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13881 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13882 ret void 13883} 13884 13885define void @s_shuffle_v3f16_v4f16__u_6_6() { 13886; GFX9-LABEL: s_shuffle_v3f16_v4f16__u_6_6: 13887; GFX9: ; %bb.0: 13888; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13889; GFX9-NEXT: ;;#ASMSTART 13890; GFX9-NEXT: ; def s[8:9] 13891; GFX9-NEXT: ;;#ASMEND 13892; GFX9-NEXT: s_lshl_b32 s8, s9, 16 13893; GFX9-NEXT: ;;#ASMSTART 13894; GFX9-NEXT: ; use s[8:9] 13895; GFX9-NEXT: ;;#ASMEND 13896; GFX9-NEXT: s_setpc_b64 s[30:31] 13897 %vec0 = call <4 x half> asm "; def $0", "=s"() 13898 %vec1 = call <4 x half> asm "; def $0", "=s"() 13899 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 6, i32 6> 13900 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13901 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13902 ret void 13903} 13904 13905define void @s_shuffle_v3f16_v4f16__0_6_6() { 13906; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_6_6: 13907; GFX900: ; %bb.0: 13908; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13909; GFX900-NEXT: ;;#ASMSTART 13910; GFX900-NEXT: ; def s[8:9] 13911; GFX900-NEXT: ;;#ASMEND 13912; GFX900-NEXT: ;;#ASMSTART 13913; GFX900-NEXT: ; def s[4:5] 13914; GFX900-NEXT: ;;#ASMEND 13915; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13916; GFX900-NEXT: ;;#ASMSTART 13917; GFX900-NEXT: ; use s[8:9] 13918; GFX900-NEXT: ;;#ASMEND 13919; GFX900-NEXT: s_setpc_b64 s[30:31] 13920; 13921; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_6_6: 13922; GFX90A: ; %bb.0: 13923; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13924; GFX90A-NEXT: ;;#ASMSTART 13925; GFX90A-NEXT: ; def s[8:9] 13926; GFX90A-NEXT: ;;#ASMEND 13927; GFX90A-NEXT: ;;#ASMSTART 13928; GFX90A-NEXT: ; def s[4:5] 13929; GFX90A-NEXT: ;;#ASMEND 13930; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13931; GFX90A-NEXT: ;;#ASMSTART 13932; GFX90A-NEXT: ; use s[8:9] 13933; GFX90A-NEXT: ;;#ASMEND 13934; GFX90A-NEXT: s_setpc_b64 s[30:31] 13935; 13936; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_6_6: 13937; GFX940: ; %bb.0: 13938; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13939; GFX940-NEXT: ;;#ASMSTART 13940; GFX940-NEXT: ; def s[8:9] 13941; GFX940-NEXT: ;;#ASMEND 13942; GFX940-NEXT: ;;#ASMSTART 13943; GFX940-NEXT: ; def s[0:1] 13944; GFX940-NEXT: ;;#ASMEND 13945; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 13946; GFX940-NEXT: ;;#ASMSTART 13947; GFX940-NEXT: ; use s[8:9] 13948; GFX940-NEXT: ;;#ASMEND 13949; GFX940-NEXT: s_setpc_b64 s[30:31] 13950 %vec0 = call <4 x half> asm "; def $0", "=s"() 13951 %vec1 = call <4 x half> asm "; def $0", "=s"() 13952 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 6, i32 6> 13953 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 13954 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 13955 ret void 13956} 13957 13958define void @s_shuffle_v3f16_v4f16__1_6_6() { 13959; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_6_6: 13960; GFX900: ; %bb.0: 13961; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13962; GFX900-NEXT: ;;#ASMSTART 13963; GFX900-NEXT: ; def s[4:5] 13964; GFX900-NEXT: ;;#ASMEND 13965; GFX900-NEXT: ;;#ASMSTART 13966; GFX900-NEXT: ; def s[8:9] 13967; GFX900-NEXT: ;;#ASMEND 13968; GFX900-NEXT: s_lshr_b32 s4, s4, 16 13969; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13970; GFX900-NEXT: ;;#ASMSTART 13971; GFX900-NEXT: ; use s[8:9] 13972; GFX900-NEXT: ;;#ASMEND 13973; GFX900-NEXT: s_setpc_b64 s[30:31] 13974; 13975; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_6_6: 13976; GFX90A: ; %bb.0: 13977; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13978; GFX90A-NEXT: ;;#ASMSTART 13979; GFX90A-NEXT: ; def s[4:5] 13980; GFX90A-NEXT: ;;#ASMEND 13981; GFX90A-NEXT: ;;#ASMSTART 13982; GFX90A-NEXT: ; def s[8:9] 13983; GFX90A-NEXT: ;;#ASMEND 13984; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 13985; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 13986; GFX90A-NEXT: ;;#ASMSTART 13987; GFX90A-NEXT: ; use s[8:9] 13988; GFX90A-NEXT: ;;#ASMEND 13989; GFX90A-NEXT: s_setpc_b64 s[30:31] 13990; 13991; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_6_6: 13992; GFX940: ; %bb.0: 13993; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 13994; GFX940-NEXT: ;;#ASMSTART 13995; GFX940-NEXT: ; def s[0:1] 13996; GFX940-NEXT: ;;#ASMEND 13997; GFX940-NEXT: ;;#ASMSTART 13998; GFX940-NEXT: ; def s[8:9] 13999; GFX940-NEXT: ;;#ASMEND 14000; GFX940-NEXT: s_lshr_b32 s0, s0, 16 14001; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14002; GFX940-NEXT: ;;#ASMSTART 14003; GFX940-NEXT: ; use s[8:9] 14004; GFX940-NEXT: ;;#ASMEND 14005; GFX940-NEXT: s_setpc_b64 s[30:31] 14006 %vec0 = call <4 x half> asm "; def $0", "=s"() 14007 %vec1 = call <4 x half> asm "; def $0", "=s"() 14008 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 6, i32 6> 14009 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14010 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14011 ret void 14012} 14013 14014define void @s_shuffle_v3f16_v4f16__2_6_6() { 14015; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_6_6: 14016; GFX900: ; %bb.0: 14017; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14018; GFX900-NEXT: ;;#ASMSTART 14019; GFX900-NEXT: ; def s[8:9] 14020; GFX900-NEXT: ;;#ASMEND 14021; GFX900-NEXT: ;;#ASMSTART 14022; GFX900-NEXT: ; def s[4:5] 14023; GFX900-NEXT: ;;#ASMEND 14024; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 14025; GFX900-NEXT: ;;#ASMSTART 14026; GFX900-NEXT: ; use s[8:9] 14027; GFX900-NEXT: ;;#ASMEND 14028; GFX900-NEXT: s_setpc_b64 s[30:31] 14029; 14030; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_6_6: 14031; GFX90A: ; %bb.0: 14032; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14033; GFX90A-NEXT: ;;#ASMSTART 14034; GFX90A-NEXT: ; def s[8:9] 14035; GFX90A-NEXT: ;;#ASMEND 14036; GFX90A-NEXT: ;;#ASMSTART 14037; GFX90A-NEXT: ; def s[4:5] 14038; GFX90A-NEXT: ;;#ASMEND 14039; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 14040; GFX90A-NEXT: ;;#ASMSTART 14041; GFX90A-NEXT: ; use s[8:9] 14042; GFX90A-NEXT: ;;#ASMEND 14043; GFX90A-NEXT: s_setpc_b64 s[30:31] 14044; 14045; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_6_6: 14046; GFX940: ; %bb.0: 14047; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14048; GFX940-NEXT: ;;#ASMSTART 14049; GFX940-NEXT: ; def s[8:9] 14050; GFX940-NEXT: ;;#ASMEND 14051; GFX940-NEXT: ;;#ASMSTART 14052; GFX940-NEXT: ; def s[0:1] 14053; GFX940-NEXT: ;;#ASMEND 14054; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 14055; GFX940-NEXT: ;;#ASMSTART 14056; GFX940-NEXT: ; use s[8:9] 14057; GFX940-NEXT: ;;#ASMEND 14058; GFX940-NEXT: s_setpc_b64 s[30:31] 14059 %vec0 = call <4 x half> asm "; def $0", "=s"() 14060 %vec1 = call <4 x half> asm "; def $0", "=s"() 14061 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 6, i32 6> 14062 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14063 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14064 ret void 14065} 14066 14067define void @s_shuffle_v3f16_v4f16__3_6_6() { 14068; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_6_6: 14069; GFX900: ; %bb.0: 14070; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14071; GFX900-NEXT: ;;#ASMSTART 14072; GFX900-NEXT: ; def s[4:5] 14073; GFX900-NEXT: ;;#ASMEND 14074; GFX900-NEXT: ;;#ASMSTART 14075; GFX900-NEXT: ; def s[8:9] 14076; GFX900-NEXT: ;;#ASMEND 14077; GFX900-NEXT: s_lshr_b32 s4, s5, 16 14078; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14079; GFX900-NEXT: ;;#ASMSTART 14080; GFX900-NEXT: ; use s[8:9] 14081; GFX900-NEXT: ;;#ASMEND 14082; GFX900-NEXT: s_setpc_b64 s[30:31] 14083; 14084; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_6_6: 14085; GFX90A: ; %bb.0: 14086; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14087; GFX90A-NEXT: ;;#ASMSTART 14088; GFX90A-NEXT: ; def s[4:5] 14089; GFX90A-NEXT: ;;#ASMEND 14090; GFX90A-NEXT: ;;#ASMSTART 14091; GFX90A-NEXT: ; def s[8:9] 14092; GFX90A-NEXT: ;;#ASMEND 14093; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 14094; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14095; GFX90A-NEXT: ;;#ASMSTART 14096; GFX90A-NEXT: ; use s[8:9] 14097; GFX90A-NEXT: ;;#ASMEND 14098; GFX90A-NEXT: s_setpc_b64 s[30:31] 14099; 14100; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_6_6: 14101; GFX940: ; %bb.0: 14102; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14103; GFX940-NEXT: ;;#ASMSTART 14104; GFX940-NEXT: ; def s[0:1] 14105; GFX940-NEXT: ;;#ASMEND 14106; GFX940-NEXT: ;;#ASMSTART 14107; GFX940-NEXT: ; def s[8:9] 14108; GFX940-NEXT: ;;#ASMEND 14109; GFX940-NEXT: s_lshr_b32 s0, s1, 16 14110; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14111; GFX940-NEXT: ;;#ASMSTART 14112; GFX940-NEXT: ; use s[8:9] 14113; GFX940-NEXT: ;;#ASMEND 14114; GFX940-NEXT: s_setpc_b64 s[30:31] 14115 %vec0 = call <4 x half> asm "; def $0", "=s"() 14116 %vec1 = call <4 x half> asm "; def $0", "=s"() 14117 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 6, i32 6> 14118 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14119 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14120 ret void 14121} 14122 14123define void @s_shuffle_v3f16_v4f16__4_6_6() { 14124; GFX9-LABEL: s_shuffle_v3f16_v4f16__4_6_6: 14125; GFX9: ; %bb.0: 14126; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14127; GFX9-NEXT: ;;#ASMSTART 14128; GFX9-NEXT: ; def s[8:9] 14129; GFX9-NEXT: ;;#ASMEND 14130; GFX9-NEXT: s_pack_ll_b32_b16 s8, s8, s9 14131; GFX9-NEXT: ;;#ASMSTART 14132; GFX9-NEXT: ; use s[8:9] 14133; GFX9-NEXT: ;;#ASMEND 14134; GFX9-NEXT: s_setpc_b64 s[30:31] 14135 %vec0 = call <4 x half> asm "; def $0", "=s"() 14136 %vec1 = call <4 x half> asm "; def $0", "=s"() 14137 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 6, i32 6> 14138 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14139 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14140 ret void 14141} 14142 14143define void @s_shuffle_v3f16_v4f16__5_6_6() { 14144; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_6_6: 14145; GFX900: ; %bb.0: 14146; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14147; GFX900-NEXT: ;;#ASMSTART 14148; GFX900-NEXT: ; def s[8:9] 14149; GFX900-NEXT: ;;#ASMEND 14150; GFX900-NEXT: s_lshr_b32 s4, s8, 16 14151; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14152; GFX900-NEXT: ;;#ASMSTART 14153; GFX900-NEXT: ; use s[8:9] 14154; GFX900-NEXT: ;;#ASMEND 14155; GFX900-NEXT: s_setpc_b64 s[30:31] 14156; 14157; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_6_6: 14158; GFX90A: ; %bb.0: 14159; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14160; GFX90A-NEXT: ;;#ASMSTART 14161; GFX90A-NEXT: ; def s[8:9] 14162; GFX90A-NEXT: ;;#ASMEND 14163; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 14164; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14165; GFX90A-NEXT: ;;#ASMSTART 14166; GFX90A-NEXT: ; use s[8:9] 14167; GFX90A-NEXT: ;;#ASMEND 14168; GFX90A-NEXT: s_setpc_b64 s[30:31] 14169; 14170; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_6_6: 14171; GFX940: ; %bb.0: 14172; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14173; GFX940-NEXT: ;;#ASMSTART 14174; GFX940-NEXT: ; def s[8:9] 14175; GFX940-NEXT: ;;#ASMEND 14176; GFX940-NEXT: s_lshr_b32 s0, s8, 16 14177; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14178; GFX940-NEXT: ;;#ASMSTART 14179; GFX940-NEXT: ; use s[8:9] 14180; GFX940-NEXT: ;;#ASMEND 14181; GFX940-NEXT: s_setpc_b64 s[30:31] 14182 %vec0 = call <4 x half> asm "; def $0", "=s"() 14183 %vec1 = call <4 x half> asm "; def $0", "=s"() 14184 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 6, i32 6> 14185 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14186 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14187 ret void 14188} 14189 14190define void @s_shuffle_v3f16_v4f16__6_6_6() { 14191; GFX9-LABEL: s_shuffle_v3f16_v4f16__6_6_6: 14192; GFX9: ; %bb.0: 14193; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14194; GFX9-NEXT: ;;#ASMSTART 14195; GFX9-NEXT: ; def s[8:9] 14196; GFX9-NEXT: ;;#ASMEND 14197; GFX9-NEXT: s_pack_ll_b32_b16 s8, s9, s9 14198; GFX9-NEXT: ;;#ASMSTART 14199; GFX9-NEXT: ; use s[8:9] 14200; GFX9-NEXT: ;;#ASMEND 14201; GFX9-NEXT: s_setpc_b64 s[30:31] 14202 %vec0 = call <4 x half> asm "; def $0", "=s"() 14203 %vec1 = call <4 x half> asm "; def $0", "=s"() 14204 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 6, i32 6> 14205 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14206 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14207 ret void 14208} 14209 14210define void @s_shuffle_v3f16_v4f16__7_6_6() { 14211; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_6: 14212; GFX900: ; %bb.0: 14213; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14214; GFX900-NEXT: ;;#ASMSTART 14215; GFX900-NEXT: ; def s[8:9] 14216; GFX900-NEXT: ;;#ASMEND 14217; GFX900-NEXT: s_lshr_b32 s4, s9, 16 14218; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14219; GFX900-NEXT: ;;#ASMSTART 14220; GFX900-NEXT: ; use s[8:9] 14221; GFX900-NEXT: ;;#ASMEND 14222; GFX900-NEXT: s_setpc_b64 s[30:31] 14223; 14224; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_6: 14225; GFX90A: ; %bb.0: 14226; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14227; GFX90A-NEXT: ;;#ASMSTART 14228; GFX90A-NEXT: ; def s[8:9] 14229; GFX90A-NEXT: ;;#ASMEND 14230; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 14231; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14232; GFX90A-NEXT: ;;#ASMSTART 14233; GFX90A-NEXT: ; use s[8:9] 14234; GFX90A-NEXT: ;;#ASMEND 14235; GFX90A-NEXT: s_setpc_b64 s[30:31] 14236; 14237; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_6: 14238; GFX940: ; %bb.0: 14239; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14240; GFX940-NEXT: ;;#ASMSTART 14241; GFX940-NEXT: ; def s[8:9] 14242; GFX940-NEXT: ;;#ASMEND 14243; GFX940-NEXT: s_lshr_b32 s0, s9, 16 14244; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14245; GFX940-NEXT: ;;#ASMSTART 14246; GFX940-NEXT: ; use s[8:9] 14247; GFX940-NEXT: ;;#ASMEND 14248; GFX940-NEXT: s_setpc_b64 s[30:31] 14249 %vec0 = call <4 x half> asm "; def $0", "=s"() 14250 %vec1 = call <4 x half> asm "; def $0", "=s"() 14251 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 6> 14252 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14253 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14254 ret void 14255} 14256 14257define void @s_shuffle_v3f16_v4f16__7_u_6() { 14258; GFX9-LABEL: s_shuffle_v3f16_v4f16__7_u_6: 14259; GFX9: ; %bb.0: 14260; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14261; GFX9-NEXT: ;;#ASMSTART 14262; GFX9-NEXT: ; def s[8:9] 14263; GFX9-NEXT: ;;#ASMEND 14264; GFX9-NEXT: s_lshr_b32 s8, s9, 16 14265; GFX9-NEXT: ;;#ASMSTART 14266; GFX9-NEXT: ; use s[8:9] 14267; GFX9-NEXT: ;;#ASMEND 14268; GFX9-NEXT: s_setpc_b64 s[30:31] 14269 %vec0 = call <4 x half> asm "; def $0", "=s"() 14270 %vec1 = call <4 x half> asm "; def $0", "=s"() 14271 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 6> 14272 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14273 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14274 ret void 14275} 14276 14277define void @s_shuffle_v3f16_v4f16__7_0_6() { 14278; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_6: 14279; GFX900: ; %bb.0: 14280; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14281; GFX900-NEXT: ;;#ASMSTART 14282; GFX900-NEXT: ; def s[4:5] 14283; GFX900-NEXT: ;;#ASMEND 14284; GFX900-NEXT: ;;#ASMSTART 14285; GFX900-NEXT: ; def s[8:9] 14286; GFX900-NEXT: ;;#ASMEND 14287; GFX900-NEXT: s_lshr_b32 s5, s9, 16 14288; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14289; GFX900-NEXT: ;;#ASMSTART 14290; GFX900-NEXT: ; use s[8:9] 14291; GFX900-NEXT: ;;#ASMEND 14292; GFX900-NEXT: s_setpc_b64 s[30:31] 14293; 14294; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_6: 14295; GFX90A: ; %bb.0: 14296; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14297; GFX90A-NEXT: ;;#ASMSTART 14298; GFX90A-NEXT: ; def s[4:5] 14299; GFX90A-NEXT: ;;#ASMEND 14300; GFX90A-NEXT: ;;#ASMSTART 14301; GFX90A-NEXT: ; def s[8:9] 14302; GFX90A-NEXT: ;;#ASMEND 14303; GFX90A-NEXT: s_lshr_b32 s5, s9, 16 14304; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14305; GFX90A-NEXT: ;;#ASMSTART 14306; GFX90A-NEXT: ; use s[8:9] 14307; GFX90A-NEXT: ;;#ASMEND 14308; GFX90A-NEXT: s_setpc_b64 s[30:31] 14309; 14310; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_6: 14311; GFX940: ; %bb.0: 14312; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14313; GFX940-NEXT: ;;#ASMSTART 14314; GFX940-NEXT: ; def s[0:1] 14315; GFX940-NEXT: ;;#ASMEND 14316; GFX940-NEXT: ;;#ASMSTART 14317; GFX940-NEXT: ; def s[8:9] 14318; GFX940-NEXT: ;;#ASMEND 14319; GFX940-NEXT: s_lshr_b32 s1, s9, 16 14320; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 14321; GFX940-NEXT: ;;#ASMSTART 14322; GFX940-NEXT: ; use s[8:9] 14323; GFX940-NEXT: ;;#ASMEND 14324; GFX940-NEXT: s_setpc_b64 s[30:31] 14325 %vec0 = call <4 x half> asm "; def $0", "=s"() 14326 %vec1 = call <4 x half> asm "; def $0", "=s"() 14327 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 6> 14328 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14329 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14330 ret void 14331} 14332 14333define void @s_shuffle_v3f16_v4f16__7_1_6() { 14334; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_6: 14335; GFX900: ; %bb.0: 14336; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14337; GFX900-NEXT: ;;#ASMSTART 14338; GFX900-NEXT: ; def s[4:5] 14339; GFX900-NEXT: ;;#ASMEND 14340; GFX900-NEXT: ;;#ASMSTART 14341; GFX900-NEXT: ; def s[8:9] 14342; GFX900-NEXT: ;;#ASMEND 14343; GFX900-NEXT: s_lshr_b32 s4, s4, 16 14344; GFX900-NEXT: s_lshr_b32 s5, s9, 16 14345; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14346; GFX900-NEXT: ;;#ASMSTART 14347; GFX900-NEXT: ; use s[8:9] 14348; GFX900-NEXT: ;;#ASMEND 14349; GFX900-NEXT: s_setpc_b64 s[30:31] 14350; 14351; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_6: 14352; GFX90A: ; %bb.0: 14353; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14354; GFX90A-NEXT: ;;#ASMSTART 14355; GFX90A-NEXT: ; def s[4:5] 14356; GFX90A-NEXT: ;;#ASMEND 14357; GFX90A-NEXT: ;;#ASMSTART 14358; GFX90A-NEXT: ; def s[8:9] 14359; GFX90A-NEXT: ;;#ASMEND 14360; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 14361; GFX90A-NEXT: s_lshr_b32 s5, s9, 16 14362; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14363; GFX90A-NEXT: ;;#ASMSTART 14364; GFX90A-NEXT: ; use s[8:9] 14365; GFX90A-NEXT: ;;#ASMEND 14366; GFX90A-NEXT: s_setpc_b64 s[30:31] 14367; 14368; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_6: 14369; GFX940: ; %bb.0: 14370; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14371; GFX940-NEXT: ;;#ASMSTART 14372; GFX940-NEXT: ; def s[0:1] 14373; GFX940-NEXT: ;;#ASMEND 14374; GFX940-NEXT: ;;#ASMSTART 14375; GFX940-NEXT: ; def s[8:9] 14376; GFX940-NEXT: ;;#ASMEND 14377; GFX940-NEXT: s_lshr_b32 s0, s0, 16 14378; GFX940-NEXT: s_lshr_b32 s1, s9, 16 14379; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 14380; GFX940-NEXT: ;;#ASMSTART 14381; GFX940-NEXT: ; use s[8:9] 14382; GFX940-NEXT: ;;#ASMEND 14383; GFX940-NEXT: s_setpc_b64 s[30:31] 14384 %vec0 = call <4 x half> asm "; def $0", "=s"() 14385 %vec1 = call <4 x half> asm "; def $0", "=s"() 14386 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 6> 14387 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14388 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14389 ret void 14390} 14391 14392define void @s_shuffle_v3f16_v4f16__7_2_6() { 14393; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_6: 14394; GFX900: ; %bb.0: 14395; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14396; GFX900-NEXT: ;;#ASMSTART 14397; GFX900-NEXT: ; def s[4:5] 14398; GFX900-NEXT: ;;#ASMEND 14399; GFX900-NEXT: ;;#ASMSTART 14400; GFX900-NEXT: ; def s[8:9] 14401; GFX900-NEXT: ;;#ASMEND 14402; GFX900-NEXT: s_lshr_b32 s4, s9, 16 14403; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 14404; GFX900-NEXT: ;;#ASMSTART 14405; GFX900-NEXT: ; use s[8:9] 14406; GFX900-NEXT: ;;#ASMEND 14407; GFX900-NEXT: s_setpc_b64 s[30:31] 14408; 14409; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_6: 14410; GFX90A: ; %bb.0: 14411; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14412; GFX90A-NEXT: ;;#ASMSTART 14413; GFX90A-NEXT: ; def s[4:5] 14414; GFX90A-NEXT: ;;#ASMEND 14415; GFX90A-NEXT: ;;#ASMSTART 14416; GFX90A-NEXT: ; def s[8:9] 14417; GFX90A-NEXT: ;;#ASMEND 14418; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 14419; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 14420; GFX90A-NEXT: ;;#ASMSTART 14421; GFX90A-NEXT: ; use s[8:9] 14422; GFX90A-NEXT: ;;#ASMEND 14423; GFX90A-NEXT: s_setpc_b64 s[30:31] 14424; 14425; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_6: 14426; GFX940: ; %bb.0: 14427; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14428; GFX940-NEXT: ;;#ASMSTART 14429; GFX940-NEXT: ; def s[0:1] 14430; GFX940-NEXT: ;;#ASMEND 14431; GFX940-NEXT: ;;#ASMSTART 14432; GFX940-NEXT: ; def s[8:9] 14433; GFX940-NEXT: ;;#ASMEND 14434; GFX940-NEXT: s_lshr_b32 s0, s9, 16 14435; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 14436; GFX940-NEXT: ;;#ASMSTART 14437; GFX940-NEXT: ; use s[8:9] 14438; GFX940-NEXT: ;;#ASMEND 14439; GFX940-NEXT: s_setpc_b64 s[30:31] 14440 %vec0 = call <4 x half> asm "; def $0", "=s"() 14441 %vec1 = call <4 x half> asm "; def $0", "=s"() 14442 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 6> 14443 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14444 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14445 ret void 14446} 14447 14448define void @s_shuffle_v3f16_v4f16__7_3_6() { 14449; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_6: 14450; GFX900: ; %bb.0: 14451; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14452; GFX900-NEXT: ;;#ASMSTART 14453; GFX900-NEXT: ; def s[4:5] 14454; GFX900-NEXT: ;;#ASMEND 14455; GFX900-NEXT: ;;#ASMSTART 14456; GFX900-NEXT: ; def s[8:9] 14457; GFX900-NEXT: ;;#ASMEND 14458; GFX900-NEXT: s_lshr_b32 s4, s5, 16 14459; GFX900-NEXT: s_lshr_b32 s5, s9, 16 14460; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14461; GFX900-NEXT: ;;#ASMSTART 14462; GFX900-NEXT: ; use s[8:9] 14463; GFX900-NEXT: ;;#ASMEND 14464; GFX900-NEXT: s_setpc_b64 s[30:31] 14465; 14466; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_6: 14467; GFX90A: ; %bb.0: 14468; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14469; GFX90A-NEXT: ;;#ASMSTART 14470; GFX90A-NEXT: ; def s[4:5] 14471; GFX90A-NEXT: ;;#ASMEND 14472; GFX90A-NEXT: ;;#ASMSTART 14473; GFX90A-NEXT: ; def s[8:9] 14474; GFX90A-NEXT: ;;#ASMEND 14475; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 14476; GFX90A-NEXT: s_lshr_b32 s5, s9, 16 14477; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14478; GFX90A-NEXT: ;;#ASMSTART 14479; GFX90A-NEXT: ; use s[8:9] 14480; GFX90A-NEXT: ;;#ASMEND 14481; GFX90A-NEXT: s_setpc_b64 s[30:31] 14482; 14483; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_6: 14484; GFX940: ; %bb.0: 14485; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14486; GFX940-NEXT: ;;#ASMSTART 14487; GFX940-NEXT: ; def s[0:1] 14488; GFX940-NEXT: ;;#ASMEND 14489; GFX940-NEXT: ;;#ASMSTART 14490; GFX940-NEXT: ; def s[8:9] 14491; GFX940-NEXT: ;;#ASMEND 14492; GFX940-NEXT: s_lshr_b32 s0, s1, 16 14493; GFX940-NEXT: s_lshr_b32 s1, s9, 16 14494; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 14495; GFX940-NEXT: ;;#ASMSTART 14496; GFX940-NEXT: ; use s[8:9] 14497; GFX940-NEXT: ;;#ASMEND 14498; GFX940-NEXT: s_setpc_b64 s[30:31] 14499 %vec0 = call <4 x half> asm "; def $0", "=s"() 14500 %vec1 = call <4 x half> asm "; def $0", "=s"() 14501 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 6> 14502 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14503 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14504 ret void 14505} 14506 14507define void @s_shuffle_v3f16_v4f16__7_4_6() { 14508; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_6: 14509; GFX900: ; %bb.0: 14510; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14511; GFX900-NEXT: ;;#ASMSTART 14512; GFX900-NEXT: ; def s[8:9] 14513; GFX900-NEXT: ;;#ASMEND 14514; GFX900-NEXT: s_lshr_b32 s4, s9, 16 14515; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s8 14516; GFX900-NEXT: ;;#ASMSTART 14517; GFX900-NEXT: ; use s[8:9] 14518; GFX900-NEXT: ;;#ASMEND 14519; GFX900-NEXT: s_setpc_b64 s[30:31] 14520; 14521; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_6: 14522; GFX90A: ; %bb.0: 14523; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14524; GFX90A-NEXT: ;;#ASMSTART 14525; GFX90A-NEXT: ; def s[8:9] 14526; GFX90A-NEXT: ;;#ASMEND 14527; GFX90A-NEXT: s_lshr_b32 s4, s9, 16 14528; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s8 14529; GFX90A-NEXT: ;;#ASMSTART 14530; GFX90A-NEXT: ; use s[8:9] 14531; GFX90A-NEXT: ;;#ASMEND 14532; GFX90A-NEXT: s_setpc_b64 s[30:31] 14533; 14534; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_6: 14535; GFX940: ; %bb.0: 14536; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14537; GFX940-NEXT: ;;#ASMSTART 14538; GFX940-NEXT: ; def s[8:9] 14539; GFX940-NEXT: ;;#ASMEND 14540; GFX940-NEXT: s_lshr_b32 s0, s9, 16 14541; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s8 14542; GFX940-NEXT: ;;#ASMSTART 14543; GFX940-NEXT: ; use s[8:9] 14544; GFX940-NEXT: ;;#ASMEND 14545; GFX940-NEXT: s_setpc_b64 s[30:31] 14546 %vec0 = call <4 x half> asm "; def $0", "=s"() 14547 %vec1 = call <4 x half> asm "; def $0", "=s"() 14548 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 6> 14549 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14550 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14551 ret void 14552} 14553 14554define void @s_shuffle_v3f16_v4f16__7_5_6() { 14555; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_6: 14556; GFX900: ; %bb.0: 14557; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14558; GFX900-NEXT: ;;#ASMSTART 14559; GFX900-NEXT: ; def s[8:9] 14560; GFX900-NEXT: ;;#ASMEND 14561; GFX900-NEXT: s_lshr_b32 s4, s8, 16 14562; GFX900-NEXT: s_lshr_b32 s5, s9, 16 14563; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14564; GFX900-NEXT: ;;#ASMSTART 14565; GFX900-NEXT: ; use s[8:9] 14566; GFX900-NEXT: ;;#ASMEND 14567; GFX900-NEXT: s_setpc_b64 s[30:31] 14568; 14569; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_6: 14570; GFX90A: ; %bb.0: 14571; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14572; GFX90A-NEXT: ;;#ASMSTART 14573; GFX90A-NEXT: ; def s[8:9] 14574; GFX90A-NEXT: ;;#ASMEND 14575; GFX90A-NEXT: s_lshr_b32 s4, s8, 16 14576; GFX90A-NEXT: s_lshr_b32 s5, s9, 16 14577; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 14578; GFX90A-NEXT: ;;#ASMSTART 14579; GFX90A-NEXT: ; use s[8:9] 14580; GFX90A-NEXT: ;;#ASMEND 14581; GFX90A-NEXT: s_setpc_b64 s[30:31] 14582; 14583; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_6: 14584; GFX940: ; %bb.0: 14585; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14586; GFX940-NEXT: ;;#ASMSTART 14587; GFX940-NEXT: ; def s[8:9] 14588; GFX940-NEXT: ;;#ASMEND 14589; GFX940-NEXT: s_lshr_b32 s0, s8, 16 14590; GFX940-NEXT: s_lshr_b32 s1, s9, 16 14591; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 14592; GFX940-NEXT: ;;#ASMSTART 14593; GFX940-NEXT: ; use s[8:9] 14594; GFX940-NEXT: ;;#ASMEND 14595; GFX940-NEXT: s_setpc_b64 s[30:31] 14596 %vec0 = call <4 x half> asm "; def $0", "=s"() 14597 %vec1 = call <4 x half> asm "; def $0", "=s"() 14598 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 6> 14599 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14600 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14601 ret void 14602} 14603 14604define void @s_shuffle_v3f16_v4f16__u_7_7() { 14605; GFX900-LABEL: s_shuffle_v3f16_v4f16__u_7_7: 14606; GFX900: ; %bb.0: 14607; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14608; GFX900-NEXT: ;;#ASMSTART 14609; GFX900-NEXT: ; def s[4:5] 14610; GFX900-NEXT: ;;#ASMEND 14611; GFX900-NEXT: s_lshr_b32 s9, s5, 16 14612; GFX900-NEXT: s_mov_b32 s8, s5 14613; GFX900-NEXT: ;;#ASMSTART 14614; GFX900-NEXT: ; use s[8:9] 14615; GFX900-NEXT: ;;#ASMEND 14616; GFX900-NEXT: s_setpc_b64 s[30:31] 14617; 14618; GFX90A-LABEL: s_shuffle_v3f16_v4f16__u_7_7: 14619; GFX90A: ; %bb.0: 14620; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14621; GFX90A-NEXT: ;;#ASMSTART 14622; GFX90A-NEXT: ; def s[4:5] 14623; GFX90A-NEXT: ;;#ASMEND 14624; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 14625; GFX90A-NEXT: s_mov_b32 s8, s5 14626; GFX90A-NEXT: ;;#ASMSTART 14627; GFX90A-NEXT: ; use s[8:9] 14628; GFX90A-NEXT: ;;#ASMEND 14629; GFX90A-NEXT: s_setpc_b64 s[30:31] 14630; 14631; GFX940-LABEL: s_shuffle_v3f16_v4f16__u_7_7: 14632; GFX940: ; %bb.0: 14633; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14634; GFX940-NEXT: ;;#ASMSTART 14635; GFX940-NEXT: ; def s[0:1] 14636; GFX940-NEXT: ;;#ASMEND 14637; GFX940-NEXT: s_lshr_b32 s9, s1, 16 14638; GFX940-NEXT: s_mov_b32 s8, s1 14639; GFX940-NEXT: ;;#ASMSTART 14640; GFX940-NEXT: ; use s[8:9] 14641; GFX940-NEXT: ;;#ASMEND 14642; GFX940-NEXT: s_setpc_b64 s[30:31] 14643 %vec0 = call <4 x half> asm "; def $0", "=s"() 14644 %vec1 = call <4 x half> asm "; def $0", "=s"() 14645 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 poison, i32 7, i32 7> 14646 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14647 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14648 ret void 14649} 14650 14651define void @s_shuffle_v3f16_v4f16__0_7_7() { 14652; GFX900-LABEL: s_shuffle_v3f16_v4f16__0_7_7: 14653; GFX900: ; %bb.0: 14654; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14655; GFX900-NEXT: ;;#ASMSTART 14656; GFX900-NEXT: ; def s[6:7] 14657; GFX900-NEXT: ;;#ASMEND 14658; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14659; GFX900-NEXT: ;;#ASMSTART 14660; GFX900-NEXT: ; def s[4:5] 14661; GFX900-NEXT: ;;#ASMEND 14662; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14663; GFX900-NEXT: ;;#ASMSTART 14664; GFX900-NEXT: ; use s[8:9] 14665; GFX900-NEXT: ;;#ASMEND 14666; GFX900-NEXT: s_setpc_b64 s[30:31] 14667; 14668; GFX90A-LABEL: s_shuffle_v3f16_v4f16__0_7_7: 14669; GFX90A: ; %bb.0: 14670; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14671; GFX90A-NEXT: ;;#ASMSTART 14672; GFX90A-NEXT: ; def s[6:7] 14673; GFX90A-NEXT: ;;#ASMEND 14674; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14675; GFX90A-NEXT: ;;#ASMSTART 14676; GFX90A-NEXT: ; def s[4:5] 14677; GFX90A-NEXT: ;;#ASMEND 14678; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14679; GFX90A-NEXT: ;;#ASMSTART 14680; GFX90A-NEXT: ; use s[8:9] 14681; GFX90A-NEXT: ;;#ASMEND 14682; GFX90A-NEXT: s_setpc_b64 s[30:31] 14683; 14684; GFX940-LABEL: s_shuffle_v3f16_v4f16__0_7_7: 14685; GFX940: ; %bb.0: 14686; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14687; GFX940-NEXT: ;;#ASMSTART 14688; GFX940-NEXT: ; def s[2:3] 14689; GFX940-NEXT: ;;#ASMEND 14690; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14691; GFX940-NEXT: ;;#ASMSTART 14692; GFX940-NEXT: ; def s[0:1] 14693; GFX940-NEXT: ;;#ASMEND 14694; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14695; GFX940-NEXT: ;;#ASMSTART 14696; GFX940-NEXT: ; use s[8:9] 14697; GFX940-NEXT: ;;#ASMEND 14698; GFX940-NEXT: s_setpc_b64 s[30:31] 14699 %vec0 = call <4 x half> asm "; def $0", "=s"() 14700 %vec1 = call <4 x half> asm "; def $0", "=s"() 14701 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 0, i32 7, i32 7> 14702 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14703 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14704 ret void 14705} 14706 14707define void @s_shuffle_v3f16_v4f16__1_7_7() { 14708; GFX900-LABEL: s_shuffle_v3f16_v4f16__1_7_7: 14709; GFX900: ; %bb.0: 14710; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14711; GFX900-NEXT: ;;#ASMSTART 14712; GFX900-NEXT: ; def s[4:5] 14713; GFX900-NEXT: ;;#ASMEND 14714; GFX900-NEXT: ;;#ASMSTART 14715; GFX900-NEXT: ; def s[6:7] 14716; GFX900-NEXT: ;;#ASMEND 14717; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14718; GFX900-NEXT: s_lshr_b32 s4, s4, 16 14719; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14720; GFX900-NEXT: ;;#ASMSTART 14721; GFX900-NEXT: ; use s[8:9] 14722; GFX900-NEXT: ;;#ASMEND 14723; GFX900-NEXT: s_setpc_b64 s[30:31] 14724; 14725; GFX90A-LABEL: s_shuffle_v3f16_v4f16__1_7_7: 14726; GFX90A: ; %bb.0: 14727; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14728; GFX90A-NEXT: ;;#ASMSTART 14729; GFX90A-NEXT: ; def s[4:5] 14730; GFX90A-NEXT: ;;#ASMEND 14731; GFX90A-NEXT: ;;#ASMSTART 14732; GFX90A-NEXT: ; def s[6:7] 14733; GFX90A-NEXT: ;;#ASMEND 14734; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14735; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 14736; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14737; GFX90A-NEXT: ;;#ASMSTART 14738; GFX90A-NEXT: ; use s[8:9] 14739; GFX90A-NEXT: ;;#ASMEND 14740; GFX90A-NEXT: s_setpc_b64 s[30:31] 14741; 14742; GFX940-LABEL: s_shuffle_v3f16_v4f16__1_7_7: 14743; GFX940: ; %bb.0: 14744; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14745; GFX940-NEXT: ;;#ASMSTART 14746; GFX940-NEXT: ; def s[0:1] 14747; GFX940-NEXT: ;;#ASMEND 14748; GFX940-NEXT: ;;#ASMSTART 14749; GFX940-NEXT: ; def s[2:3] 14750; GFX940-NEXT: ;;#ASMEND 14751; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14752; GFX940-NEXT: s_lshr_b32 s0, s0, 16 14753; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14754; GFX940-NEXT: ;;#ASMSTART 14755; GFX940-NEXT: ; use s[8:9] 14756; GFX940-NEXT: ;;#ASMEND 14757; GFX940-NEXT: s_setpc_b64 s[30:31] 14758 %vec0 = call <4 x half> asm "; def $0", "=s"() 14759 %vec1 = call <4 x half> asm "; def $0", "=s"() 14760 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 1, i32 7, i32 7> 14761 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14762 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14763 ret void 14764} 14765 14766define void @s_shuffle_v3f16_v4f16__2_7_7() { 14767; GFX900-LABEL: s_shuffle_v3f16_v4f16__2_7_7: 14768; GFX900: ; %bb.0: 14769; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14770; GFX900-NEXT: ;;#ASMSTART 14771; GFX900-NEXT: ; def s[6:7] 14772; GFX900-NEXT: ;;#ASMEND 14773; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14774; GFX900-NEXT: ;;#ASMSTART 14775; GFX900-NEXT: ; def s[4:5] 14776; GFX900-NEXT: ;;#ASMEND 14777; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s9 14778; GFX900-NEXT: ;;#ASMSTART 14779; GFX900-NEXT: ; use s[8:9] 14780; GFX900-NEXT: ;;#ASMEND 14781; GFX900-NEXT: s_setpc_b64 s[30:31] 14782; 14783; GFX90A-LABEL: s_shuffle_v3f16_v4f16__2_7_7: 14784; GFX90A: ; %bb.0: 14785; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14786; GFX90A-NEXT: ;;#ASMSTART 14787; GFX90A-NEXT: ; def s[6:7] 14788; GFX90A-NEXT: ;;#ASMEND 14789; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14790; GFX90A-NEXT: ;;#ASMSTART 14791; GFX90A-NEXT: ; def s[4:5] 14792; GFX90A-NEXT: ;;#ASMEND 14793; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s9 14794; GFX90A-NEXT: ;;#ASMSTART 14795; GFX90A-NEXT: ; use s[8:9] 14796; GFX90A-NEXT: ;;#ASMEND 14797; GFX90A-NEXT: s_setpc_b64 s[30:31] 14798; 14799; GFX940-LABEL: s_shuffle_v3f16_v4f16__2_7_7: 14800; GFX940: ; %bb.0: 14801; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14802; GFX940-NEXT: ;;#ASMSTART 14803; GFX940-NEXT: ; def s[2:3] 14804; GFX940-NEXT: ;;#ASMEND 14805; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14806; GFX940-NEXT: ;;#ASMSTART 14807; GFX940-NEXT: ; def s[0:1] 14808; GFX940-NEXT: ;;#ASMEND 14809; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s9 14810; GFX940-NEXT: ;;#ASMSTART 14811; GFX940-NEXT: ; use s[8:9] 14812; GFX940-NEXT: ;;#ASMEND 14813; GFX940-NEXT: s_setpc_b64 s[30:31] 14814 %vec0 = call <4 x half> asm "; def $0", "=s"() 14815 %vec1 = call <4 x half> asm "; def $0", "=s"() 14816 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 2, i32 7, i32 7> 14817 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14818 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14819 ret void 14820} 14821 14822define void @s_shuffle_v3f16_v4f16__3_7_7() { 14823; GFX900-LABEL: s_shuffle_v3f16_v4f16__3_7_7: 14824; GFX900: ; %bb.0: 14825; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14826; GFX900-NEXT: ;;#ASMSTART 14827; GFX900-NEXT: ; def s[4:5] 14828; GFX900-NEXT: ;;#ASMEND 14829; GFX900-NEXT: ;;#ASMSTART 14830; GFX900-NEXT: ; def s[6:7] 14831; GFX900-NEXT: ;;#ASMEND 14832; GFX900-NEXT: s_lshr_b32 s9, s7, 16 14833; GFX900-NEXT: s_lshr_b32 s4, s5, 16 14834; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14835; GFX900-NEXT: ;;#ASMSTART 14836; GFX900-NEXT: ; use s[8:9] 14837; GFX900-NEXT: ;;#ASMEND 14838; GFX900-NEXT: s_setpc_b64 s[30:31] 14839; 14840; GFX90A-LABEL: s_shuffle_v3f16_v4f16__3_7_7: 14841; GFX90A: ; %bb.0: 14842; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14843; GFX90A-NEXT: ;;#ASMSTART 14844; GFX90A-NEXT: ; def s[4:5] 14845; GFX90A-NEXT: ;;#ASMEND 14846; GFX90A-NEXT: ;;#ASMSTART 14847; GFX90A-NEXT: ; def s[6:7] 14848; GFX90A-NEXT: ;;#ASMEND 14849; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 14850; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 14851; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14852; GFX90A-NEXT: ;;#ASMSTART 14853; GFX90A-NEXT: ; use s[8:9] 14854; GFX90A-NEXT: ;;#ASMEND 14855; GFX90A-NEXT: s_setpc_b64 s[30:31] 14856; 14857; GFX940-LABEL: s_shuffle_v3f16_v4f16__3_7_7: 14858; GFX940: ; %bb.0: 14859; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14860; GFX940-NEXT: ;;#ASMSTART 14861; GFX940-NEXT: ; def s[0:1] 14862; GFX940-NEXT: ;;#ASMEND 14863; GFX940-NEXT: ;;#ASMSTART 14864; GFX940-NEXT: ; def s[2:3] 14865; GFX940-NEXT: ;;#ASMEND 14866; GFX940-NEXT: s_lshr_b32 s9, s3, 16 14867; GFX940-NEXT: s_lshr_b32 s0, s1, 16 14868; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14869; GFX940-NEXT: ;;#ASMSTART 14870; GFX940-NEXT: ; use s[8:9] 14871; GFX940-NEXT: ;;#ASMEND 14872; GFX940-NEXT: s_setpc_b64 s[30:31] 14873 %vec0 = call <4 x half> asm "; def $0", "=s"() 14874 %vec1 = call <4 x half> asm "; def $0", "=s"() 14875 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 3, i32 7, i32 7> 14876 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14877 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14878 ret void 14879} 14880 14881define void @s_shuffle_v3f16_v4f16__4_7_7() { 14882; GFX900-LABEL: s_shuffle_v3f16_v4f16__4_7_7: 14883; GFX900: ; %bb.0: 14884; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14885; GFX900-NEXT: ;;#ASMSTART 14886; GFX900-NEXT: ; def s[4:5] 14887; GFX900-NEXT: ;;#ASMEND 14888; GFX900-NEXT: s_lshr_b32 s9, s5, 16 14889; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14890; GFX900-NEXT: ;;#ASMSTART 14891; GFX900-NEXT: ; use s[8:9] 14892; GFX900-NEXT: ;;#ASMEND 14893; GFX900-NEXT: s_setpc_b64 s[30:31] 14894; 14895; GFX90A-LABEL: s_shuffle_v3f16_v4f16__4_7_7: 14896; GFX90A: ; %bb.0: 14897; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14898; GFX90A-NEXT: ;;#ASMSTART 14899; GFX90A-NEXT: ; def s[4:5] 14900; GFX90A-NEXT: ;;#ASMEND 14901; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 14902; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14903; GFX90A-NEXT: ;;#ASMSTART 14904; GFX90A-NEXT: ; use s[8:9] 14905; GFX90A-NEXT: ;;#ASMEND 14906; GFX90A-NEXT: s_setpc_b64 s[30:31] 14907; 14908; GFX940-LABEL: s_shuffle_v3f16_v4f16__4_7_7: 14909; GFX940: ; %bb.0: 14910; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14911; GFX940-NEXT: ;;#ASMSTART 14912; GFX940-NEXT: ; def s[0:1] 14913; GFX940-NEXT: ;;#ASMEND 14914; GFX940-NEXT: s_lshr_b32 s9, s1, 16 14915; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14916; GFX940-NEXT: ;;#ASMSTART 14917; GFX940-NEXT: ; use s[8:9] 14918; GFX940-NEXT: ;;#ASMEND 14919; GFX940-NEXT: s_setpc_b64 s[30:31] 14920 %vec0 = call <4 x half> asm "; def $0", "=s"() 14921 %vec1 = call <4 x half> asm "; def $0", "=s"() 14922 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 4, i32 7, i32 7> 14923 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14924 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14925 ret void 14926} 14927 14928define void @s_shuffle_v3f16_v4f16__5_7_7() { 14929; GFX900-LABEL: s_shuffle_v3f16_v4f16__5_7_7: 14930; GFX900: ; %bb.0: 14931; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14932; GFX900-NEXT: ;;#ASMSTART 14933; GFX900-NEXT: ; def s[4:5] 14934; GFX900-NEXT: ;;#ASMEND 14935; GFX900-NEXT: s_lshr_b32 s9, s5, 16 14936; GFX900-NEXT: s_lshr_b32 s4, s4, 16 14937; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14938; GFX900-NEXT: ;;#ASMSTART 14939; GFX900-NEXT: ; use s[8:9] 14940; GFX900-NEXT: ;;#ASMEND 14941; GFX900-NEXT: s_setpc_b64 s[30:31] 14942; 14943; GFX90A-LABEL: s_shuffle_v3f16_v4f16__5_7_7: 14944; GFX90A: ; %bb.0: 14945; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14946; GFX90A-NEXT: ;;#ASMSTART 14947; GFX90A-NEXT: ; def s[4:5] 14948; GFX90A-NEXT: ;;#ASMEND 14949; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 14950; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 14951; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s9 14952; GFX90A-NEXT: ;;#ASMSTART 14953; GFX90A-NEXT: ; use s[8:9] 14954; GFX90A-NEXT: ;;#ASMEND 14955; GFX90A-NEXT: s_setpc_b64 s[30:31] 14956; 14957; GFX940-LABEL: s_shuffle_v3f16_v4f16__5_7_7: 14958; GFX940: ; %bb.0: 14959; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14960; GFX940-NEXT: ;;#ASMSTART 14961; GFX940-NEXT: ; def s[0:1] 14962; GFX940-NEXT: ;;#ASMEND 14963; GFX940-NEXT: s_lshr_b32 s9, s1, 16 14964; GFX940-NEXT: s_lshr_b32 s0, s0, 16 14965; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s9 14966; GFX940-NEXT: ;;#ASMSTART 14967; GFX940-NEXT: ; use s[8:9] 14968; GFX940-NEXT: ;;#ASMEND 14969; GFX940-NEXT: s_setpc_b64 s[30:31] 14970 %vec0 = call <4 x half> asm "; def $0", "=s"() 14971 %vec1 = call <4 x half> asm "; def $0", "=s"() 14972 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 5, i32 7, i32 7> 14973 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 14974 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 14975 ret void 14976} 14977 14978define void @s_shuffle_v3f16_v4f16__6_7_7() { 14979; GFX900-LABEL: s_shuffle_v3f16_v4f16__6_7_7: 14980; GFX900: ; %bb.0: 14981; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14982; GFX900-NEXT: ;;#ASMSTART 14983; GFX900-NEXT: ; def s[4:5] 14984; GFX900-NEXT: ;;#ASMEND 14985; GFX900-NEXT: s_lshr_b32 s9, s5, 16 14986; GFX900-NEXT: s_mov_b32 s8, s5 14987; GFX900-NEXT: ;;#ASMSTART 14988; GFX900-NEXT: ; use s[8:9] 14989; GFX900-NEXT: ;;#ASMEND 14990; GFX900-NEXT: s_setpc_b64 s[30:31] 14991; 14992; GFX90A-LABEL: s_shuffle_v3f16_v4f16__6_7_7: 14993; GFX90A: ; %bb.0: 14994; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 14995; GFX90A-NEXT: ;;#ASMSTART 14996; GFX90A-NEXT: ; def s[4:5] 14997; GFX90A-NEXT: ;;#ASMEND 14998; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 14999; GFX90A-NEXT: s_mov_b32 s8, s5 15000; GFX90A-NEXT: ;;#ASMSTART 15001; GFX90A-NEXT: ; use s[8:9] 15002; GFX90A-NEXT: ;;#ASMEND 15003; GFX90A-NEXT: s_setpc_b64 s[30:31] 15004; 15005; GFX940-LABEL: s_shuffle_v3f16_v4f16__6_7_7: 15006; GFX940: ; %bb.0: 15007; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15008; GFX940-NEXT: ;;#ASMSTART 15009; GFX940-NEXT: ; def s[0:1] 15010; GFX940-NEXT: ;;#ASMEND 15011; GFX940-NEXT: s_lshr_b32 s9, s1, 16 15012; GFX940-NEXT: s_mov_b32 s8, s1 15013; GFX940-NEXT: ;;#ASMSTART 15014; GFX940-NEXT: ; use s[8:9] 15015; GFX940-NEXT: ;;#ASMEND 15016; GFX940-NEXT: s_setpc_b64 s[30:31] 15017 %vec0 = call <4 x half> asm "; def $0", "=s"() 15018 %vec1 = call <4 x half> asm "; def $0", "=s"() 15019 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 6, i32 7, i32 7> 15020 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15021 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 15022 ret void 15023} 15024 15025define void @s_shuffle_v3f16_v4f16__7_u_7() { 15026; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_u_7: 15027; GFX900: ; %bb.0: 15028; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15029; GFX900-NEXT: ;;#ASMSTART 15030; GFX900-NEXT: ; def s[4:5] 15031; GFX900-NEXT: ;;#ASMEND 15032; GFX900-NEXT: s_lshr_b32 s8, s5, 16 15033; GFX900-NEXT: s_mov_b32 s9, s8 15034; GFX900-NEXT: ;;#ASMSTART 15035; GFX900-NEXT: ; use s[8:9] 15036; GFX900-NEXT: ;;#ASMEND 15037; GFX900-NEXT: s_setpc_b64 s[30:31] 15038; 15039; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_u_7: 15040; GFX90A: ; %bb.0: 15041; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15042; GFX90A-NEXT: ;;#ASMSTART 15043; GFX90A-NEXT: ; def s[4:5] 15044; GFX90A-NEXT: ;;#ASMEND 15045; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 15046; GFX90A-NEXT: s_mov_b32 s9, s8 15047; GFX90A-NEXT: ;;#ASMSTART 15048; GFX90A-NEXT: ; use s[8:9] 15049; GFX90A-NEXT: ;;#ASMEND 15050; GFX90A-NEXT: s_setpc_b64 s[30:31] 15051; 15052; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_u_7: 15053; GFX940: ; %bb.0: 15054; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15055; GFX940-NEXT: ;;#ASMSTART 15056; GFX940-NEXT: ; def s[0:1] 15057; GFX940-NEXT: ;;#ASMEND 15058; GFX940-NEXT: s_lshr_b32 s8, s1, 16 15059; GFX940-NEXT: s_mov_b32 s9, s8 15060; GFX940-NEXT: ;;#ASMSTART 15061; GFX940-NEXT: ; use s[8:9] 15062; GFX940-NEXT: ;;#ASMEND 15063; GFX940-NEXT: s_setpc_b64 s[30:31] 15064 %vec0 = call <4 x half> asm "; def $0", "=s"() 15065 %vec1 = call <4 x half> asm "; def $0", "=s"() 15066 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 poison, i32 7> 15067 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15068 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 15069 ret void 15070} 15071 15072define void @s_shuffle_v3f16_v4f16__7_0_7() { 15073; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_0_7: 15074; GFX900: ; %bb.0: 15075; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15076; GFX900-NEXT: ;;#ASMSTART 15077; GFX900-NEXT: ; def s[6:7] 15078; GFX900-NEXT: ;;#ASMEND 15079; GFX900-NEXT: s_lshr_b32 s9, s7, 16 15080; GFX900-NEXT: ;;#ASMSTART 15081; GFX900-NEXT: ; def s[4:5] 15082; GFX900-NEXT: ;;#ASMEND 15083; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15084; GFX900-NEXT: ;;#ASMSTART 15085; GFX900-NEXT: ; use s[8:9] 15086; GFX900-NEXT: ;;#ASMEND 15087; GFX900-NEXT: s_setpc_b64 s[30:31] 15088; 15089; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_0_7: 15090; GFX90A: ; %bb.0: 15091; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15092; GFX90A-NEXT: ;;#ASMSTART 15093; GFX90A-NEXT: ; def s[6:7] 15094; GFX90A-NEXT: ;;#ASMEND 15095; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 15096; GFX90A-NEXT: ;;#ASMSTART 15097; GFX90A-NEXT: ; def s[4:5] 15098; GFX90A-NEXT: ;;#ASMEND 15099; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15100; GFX90A-NEXT: ;;#ASMSTART 15101; GFX90A-NEXT: ; use s[8:9] 15102; GFX90A-NEXT: ;;#ASMEND 15103; GFX90A-NEXT: s_setpc_b64 s[30:31] 15104; 15105; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_0_7: 15106; GFX940: ; %bb.0: 15107; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15108; GFX940-NEXT: ;;#ASMSTART 15109; GFX940-NEXT: ; def s[2:3] 15110; GFX940-NEXT: ;;#ASMEND 15111; GFX940-NEXT: s_lshr_b32 s9, s3, 16 15112; GFX940-NEXT: ;;#ASMSTART 15113; GFX940-NEXT: ; def s[0:1] 15114; GFX940-NEXT: ;;#ASMEND 15115; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 15116; GFX940-NEXT: ;;#ASMSTART 15117; GFX940-NEXT: ; use s[8:9] 15118; GFX940-NEXT: ;;#ASMEND 15119; GFX940-NEXT: s_setpc_b64 s[30:31] 15120 %vec0 = call <4 x half> asm "; def $0", "=s"() 15121 %vec1 = call <4 x half> asm "; def $0", "=s"() 15122 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 0, i32 7> 15123 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15124 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 15125 ret void 15126} 15127 15128define void @s_shuffle_v3f16_v4f16__7_1_7() { 15129; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_1_7: 15130; GFX900: ; %bb.0: 15131; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15132; GFX900-NEXT: ;;#ASMSTART 15133; GFX900-NEXT: ; def s[4:5] 15134; GFX900-NEXT: ;;#ASMEND 15135; GFX900-NEXT: ;;#ASMSTART 15136; GFX900-NEXT: ; def s[6:7] 15137; GFX900-NEXT: ;;#ASMEND 15138; GFX900-NEXT: s_lshr_b32 s4, s4, 16 15139; GFX900-NEXT: s_lshr_b32 s9, s7, 16 15140; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15141; GFX900-NEXT: ;;#ASMSTART 15142; GFX900-NEXT: ; use s[8:9] 15143; GFX900-NEXT: ;;#ASMEND 15144; GFX900-NEXT: s_setpc_b64 s[30:31] 15145; 15146; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_1_7: 15147; GFX90A: ; %bb.0: 15148; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15149; GFX90A-NEXT: ;;#ASMSTART 15150; GFX90A-NEXT: ; def s[4:5] 15151; GFX90A-NEXT: ;;#ASMEND 15152; GFX90A-NEXT: ;;#ASMSTART 15153; GFX90A-NEXT: ; def s[6:7] 15154; GFX90A-NEXT: ;;#ASMEND 15155; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 15156; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 15157; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15158; GFX90A-NEXT: ;;#ASMSTART 15159; GFX90A-NEXT: ; use s[8:9] 15160; GFX90A-NEXT: ;;#ASMEND 15161; GFX90A-NEXT: s_setpc_b64 s[30:31] 15162; 15163; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_1_7: 15164; GFX940: ; %bb.0: 15165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15166; GFX940-NEXT: ;;#ASMSTART 15167; GFX940-NEXT: ; def s[0:1] 15168; GFX940-NEXT: ;;#ASMEND 15169; GFX940-NEXT: ;;#ASMSTART 15170; GFX940-NEXT: ; def s[2:3] 15171; GFX940-NEXT: ;;#ASMEND 15172; GFX940-NEXT: s_lshr_b32 s0, s0, 16 15173; GFX940-NEXT: s_lshr_b32 s9, s3, 16 15174; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 15175; GFX940-NEXT: ;;#ASMSTART 15176; GFX940-NEXT: ; use s[8:9] 15177; GFX940-NEXT: ;;#ASMEND 15178; GFX940-NEXT: s_setpc_b64 s[30:31] 15179 %vec0 = call <4 x half> asm "; def $0", "=s"() 15180 %vec1 = call <4 x half> asm "; def $0", "=s"() 15181 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 1, i32 7> 15182 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15183 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 15184 ret void 15185} 15186 15187define void @s_shuffle_v3f16_v4f16__7_2_7() { 15188; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_2_7: 15189; GFX900: ; %bb.0: 15190; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15191; GFX900-NEXT: ;;#ASMSTART 15192; GFX900-NEXT: ; def s[6:7] 15193; GFX900-NEXT: ;;#ASMEND 15194; GFX900-NEXT: s_lshr_b32 s9, s7, 16 15195; GFX900-NEXT: ;;#ASMSTART 15196; GFX900-NEXT: ; def s[4:5] 15197; GFX900-NEXT: ;;#ASMEND 15198; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s5 15199; GFX900-NEXT: ;;#ASMSTART 15200; GFX900-NEXT: ; use s[8:9] 15201; GFX900-NEXT: ;;#ASMEND 15202; GFX900-NEXT: s_setpc_b64 s[30:31] 15203; 15204; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_2_7: 15205; GFX90A: ; %bb.0: 15206; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15207; GFX90A-NEXT: ;;#ASMSTART 15208; GFX90A-NEXT: ; def s[6:7] 15209; GFX90A-NEXT: ;;#ASMEND 15210; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 15211; GFX90A-NEXT: ;;#ASMSTART 15212; GFX90A-NEXT: ; def s[4:5] 15213; GFX90A-NEXT: ;;#ASMEND 15214; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s5 15215; GFX90A-NEXT: ;;#ASMSTART 15216; GFX90A-NEXT: ; use s[8:9] 15217; GFX90A-NEXT: ;;#ASMEND 15218; GFX90A-NEXT: s_setpc_b64 s[30:31] 15219; 15220; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_2_7: 15221; GFX940: ; %bb.0: 15222; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15223; GFX940-NEXT: ;;#ASMSTART 15224; GFX940-NEXT: ; def s[2:3] 15225; GFX940-NEXT: ;;#ASMEND 15226; GFX940-NEXT: s_lshr_b32 s9, s3, 16 15227; GFX940-NEXT: ;;#ASMSTART 15228; GFX940-NEXT: ; def s[0:1] 15229; GFX940-NEXT: ;;#ASMEND 15230; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 15231; GFX940-NEXT: ;;#ASMSTART 15232; GFX940-NEXT: ; use s[8:9] 15233; GFX940-NEXT: ;;#ASMEND 15234; GFX940-NEXT: s_setpc_b64 s[30:31] 15235 %vec0 = call <4 x half> asm "; def $0", "=s"() 15236 %vec1 = call <4 x half> asm "; def $0", "=s"() 15237 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 2, i32 7> 15238 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15239 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 15240 ret void 15241} 15242 15243define void @s_shuffle_v3f16_v4f16__7_3_7() { 15244; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_3_7: 15245; GFX900: ; %bb.0: 15246; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15247; GFX900-NEXT: ;;#ASMSTART 15248; GFX900-NEXT: ; def s[4:5] 15249; GFX900-NEXT: ;;#ASMEND 15250; GFX900-NEXT: ;;#ASMSTART 15251; GFX900-NEXT: ; def s[6:7] 15252; GFX900-NEXT: ;;#ASMEND 15253; GFX900-NEXT: s_lshr_b32 s4, s5, 16 15254; GFX900-NEXT: s_lshr_b32 s9, s7, 16 15255; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15256; GFX900-NEXT: ;;#ASMSTART 15257; GFX900-NEXT: ; use s[8:9] 15258; GFX900-NEXT: ;;#ASMEND 15259; GFX900-NEXT: s_setpc_b64 s[30:31] 15260; 15261; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_3_7: 15262; GFX90A: ; %bb.0: 15263; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15264; GFX90A-NEXT: ;;#ASMSTART 15265; GFX90A-NEXT: ; def s[4:5] 15266; GFX90A-NEXT: ;;#ASMEND 15267; GFX90A-NEXT: ;;#ASMSTART 15268; GFX90A-NEXT: ; def s[6:7] 15269; GFX90A-NEXT: ;;#ASMEND 15270; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 15271; GFX90A-NEXT: s_lshr_b32 s9, s7, 16 15272; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15273; GFX90A-NEXT: ;;#ASMSTART 15274; GFX90A-NEXT: ; use s[8:9] 15275; GFX90A-NEXT: ;;#ASMEND 15276; GFX90A-NEXT: s_setpc_b64 s[30:31] 15277; 15278; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_3_7: 15279; GFX940: ; %bb.0: 15280; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15281; GFX940-NEXT: ;;#ASMSTART 15282; GFX940-NEXT: ; def s[0:1] 15283; GFX940-NEXT: ;;#ASMEND 15284; GFX940-NEXT: ;;#ASMSTART 15285; GFX940-NEXT: ; def s[2:3] 15286; GFX940-NEXT: ;;#ASMEND 15287; GFX940-NEXT: s_lshr_b32 s0, s1, 16 15288; GFX940-NEXT: s_lshr_b32 s9, s3, 16 15289; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 15290; GFX940-NEXT: ;;#ASMSTART 15291; GFX940-NEXT: ; use s[8:9] 15292; GFX940-NEXT: ;;#ASMEND 15293; GFX940-NEXT: s_setpc_b64 s[30:31] 15294 %vec0 = call <4 x half> asm "; def $0", "=s"() 15295 %vec1 = call <4 x half> asm "; def $0", "=s"() 15296 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 3, i32 7> 15297 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15298 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 15299 ret void 15300} 15301 15302define void @s_shuffle_v3f16_v4f16__7_4_7() { 15303; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_4_7: 15304; GFX900: ; %bb.0: 15305; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15306; GFX900-NEXT: ;;#ASMSTART 15307; GFX900-NEXT: ; def s[4:5] 15308; GFX900-NEXT: ;;#ASMEND 15309; GFX900-NEXT: s_lshr_b32 s9, s5, 16 15310; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15311; GFX900-NEXT: ;;#ASMSTART 15312; GFX900-NEXT: ; use s[8:9] 15313; GFX900-NEXT: ;;#ASMEND 15314; GFX900-NEXT: s_setpc_b64 s[30:31] 15315; 15316; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_4_7: 15317; GFX90A: ; %bb.0: 15318; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15319; GFX90A-NEXT: ;;#ASMSTART 15320; GFX90A-NEXT: ; def s[4:5] 15321; GFX90A-NEXT: ;;#ASMEND 15322; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 15323; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15324; GFX90A-NEXT: ;;#ASMSTART 15325; GFX90A-NEXT: ; use s[8:9] 15326; GFX90A-NEXT: ;;#ASMEND 15327; GFX90A-NEXT: s_setpc_b64 s[30:31] 15328; 15329; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_4_7: 15330; GFX940: ; %bb.0: 15331; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15332; GFX940-NEXT: ;;#ASMSTART 15333; GFX940-NEXT: ; def s[0:1] 15334; GFX940-NEXT: ;;#ASMEND 15335; GFX940-NEXT: s_lshr_b32 s9, s1, 16 15336; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 15337; GFX940-NEXT: ;;#ASMSTART 15338; GFX940-NEXT: ; use s[8:9] 15339; GFX940-NEXT: ;;#ASMEND 15340; GFX940-NEXT: s_setpc_b64 s[30:31] 15341 %vec0 = call <4 x half> asm "; def $0", "=s"() 15342 %vec1 = call <4 x half> asm "; def $0", "=s"() 15343 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 4, i32 7> 15344 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15345 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 15346 ret void 15347} 15348 15349define void @s_shuffle_v3f16_v4f16__7_5_7() { 15350; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_5_7: 15351; GFX900: ; %bb.0: 15352; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15353; GFX900-NEXT: ;;#ASMSTART 15354; GFX900-NEXT: ; def s[4:5] 15355; GFX900-NEXT: ;;#ASMEND 15356; GFX900-NEXT: s_lshr_b32 s4, s4, 16 15357; GFX900-NEXT: s_lshr_b32 s9, s5, 16 15358; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15359; GFX900-NEXT: ;;#ASMSTART 15360; GFX900-NEXT: ; use s[8:9] 15361; GFX900-NEXT: ;;#ASMEND 15362; GFX900-NEXT: s_setpc_b64 s[30:31] 15363; 15364; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_5_7: 15365; GFX90A: ; %bb.0: 15366; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15367; GFX90A-NEXT: ;;#ASMSTART 15368; GFX90A-NEXT: ; def s[4:5] 15369; GFX90A-NEXT: ;;#ASMEND 15370; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 15371; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 15372; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s4 15373; GFX90A-NEXT: ;;#ASMSTART 15374; GFX90A-NEXT: ; use s[8:9] 15375; GFX90A-NEXT: ;;#ASMEND 15376; GFX90A-NEXT: s_setpc_b64 s[30:31] 15377; 15378; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_5_7: 15379; GFX940: ; %bb.0: 15380; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15381; GFX940-NEXT: ;;#ASMSTART 15382; GFX940-NEXT: ; def s[0:1] 15383; GFX940-NEXT: ;;#ASMEND 15384; GFX940-NEXT: s_lshr_b32 s0, s0, 16 15385; GFX940-NEXT: s_lshr_b32 s9, s1, 16 15386; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s0 15387; GFX940-NEXT: ;;#ASMSTART 15388; GFX940-NEXT: ; use s[8:9] 15389; GFX940-NEXT: ;;#ASMEND 15390; GFX940-NEXT: s_setpc_b64 s[30:31] 15391 %vec0 = call <4 x half> asm "; def $0", "=s"() 15392 %vec1 = call <4 x half> asm "; def $0", "=s"() 15393 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 5, i32 7> 15394 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15395 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 15396 ret void 15397} 15398 15399define void @s_shuffle_v3f16_v4f16__7_6_7() { 15400; GFX900-LABEL: s_shuffle_v3f16_v4f16__7_6_7: 15401; GFX900: ; %bb.0: 15402; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15403; GFX900-NEXT: ;;#ASMSTART 15404; GFX900-NEXT: ; def s[4:5] 15405; GFX900-NEXT: ;;#ASMEND 15406; GFX900-NEXT: s_lshr_b32 s9, s5, 16 15407; GFX900-NEXT: s_pack_ll_b32_b16 s8, s9, s5 15408; GFX900-NEXT: ;;#ASMSTART 15409; GFX900-NEXT: ; use s[8:9] 15410; GFX900-NEXT: ;;#ASMEND 15411; GFX900-NEXT: s_setpc_b64 s[30:31] 15412; 15413; GFX90A-LABEL: s_shuffle_v3f16_v4f16__7_6_7: 15414; GFX90A: ; %bb.0: 15415; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15416; GFX90A-NEXT: ;;#ASMSTART 15417; GFX90A-NEXT: ; def s[4:5] 15418; GFX90A-NEXT: ;;#ASMEND 15419; GFX90A-NEXT: s_lshr_b32 s9, s5, 16 15420; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s9, s5 15421; GFX90A-NEXT: ;;#ASMSTART 15422; GFX90A-NEXT: ; use s[8:9] 15423; GFX90A-NEXT: ;;#ASMEND 15424; GFX90A-NEXT: s_setpc_b64 s[30:31] 15425; 15426; GFX940-LABEL: s_shuffle_v3f16_v4f16__7_6_7: 15427; GFX940: ; %bb.0: 15428; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 15429; GFX940-NEXT: ;;#ASMSTART 15430; GFX940-NEXT: ; def s[0:1] 15431; GFX940-NEXT: ;;#ASMEND 15432; GFX940-NEXT: s_lshr_b32 s9, s1, 16 15433; GFX940-NEXT: s_pack_ll_b32_b16 s8, s9, s1 15434; GFX940-NEXT: ;;#ASMSTART 15435; GFX940-NEXT: ; use s[8:9] 15436; GFX940-NEXT: ;;#ASMEND 15437; GFX940-NEXT: s_setpc_b64 s[30:31] 15438 %vec0 = call <4 x half> asm "; def $0", "=s"() 15439 %vec1 = call <4 x half> asm "; def $0", "=s"() 15440 %shuf = shufflevector <4 x half> %vec0, <4 x half> %vec1, <3 x i32> <i32 7, i32 6, i32 7> 15441 %extend3 = shufflevector <3 x half> %shuf, <3 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison> 15442 call void asm sideeffect "; use $0", "{s[8:9]}"(<4 x half> %extend3) 15443 ret void 15444} 15445;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 15446; GFX90APLUS: {{.*}} 15447