1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v2f16_v3f16__u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v2f16_v3f16__u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <4 x half> asm "; def $0", "=v"() 13 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 14 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> poison 15 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 16 ret void 17} 18 19define void @v_shuffle_v2f16_v3f16__0_u(ptr addrspace(1) inreg %ptr) { 20; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_u: 21; GFX900: ; %bb.0: 22; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 23; GFX900-NEXT: v_mov_b32_e32 v2, 0 24; GFX900-NEXT: ;;#ASMSTART 25; GFX900-NEXT: ; def v[0:1] 26; GFX900-NEXT: ;;#ASMEND 27; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 28; GFX900-NEXT: s_waitcnt vmcnt(0) 29; GFX900-NEXT: s_setpc_b64 s[30:31] 30; 31; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_u: 32; GFX90A: ; %bb.0: 33; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 34; GFX90A-NEXT: v_mov_b32_e32 v2, 0 35; GFX90A-NEXT: ;;#ASMSTART 36; GFX90A-NEXT: ; def v[0:1] 37; GFX90A-NEXT: ;;#ASMEND 38; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 39; GFX90A-NEXT: s_waitcnt vmcnt(0) 40; GFX90A-NEXT: s_setpc_b64 s[30:31] 41; 42; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_u: 43; GFX940: ; %bb.0: 44; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 45; GFX940-NEXT: v_mov_b32_e32 v2, 0 46; GFX940-NEXT: ;;#ASMSTART 47; GFX940-NEXT: ; def v[0:1] 48; GFX940-NEXT: ;;#ASMEND 49; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 50; GFX940-NEXT: s_waitcnt vmcnt(0) 51; GFX940-NEXT: s_setpc_b64 s[30:31] 52 %vec0 = call <4 x half> asm "; def $0", "=v"() 53 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 54 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 poison> 55 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 56 ret void 57} 58 59define void @v_shuffle_v2f16_v3f16__1_u(ptr addrspace(1) inreg %ptr) { 60; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_u: 61; GFX900: ; %bb.0: 62; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; GFX900-NEXT: ;;#ASMSTART 64; GFX900-NEXT: ; def v[0:1] 65; GFX900-NEXT: ;;#ASMEND 66; GFX900-NEXT: v_mov_b32_e32 v2, 0 67; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 68; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 69; GFX900-NEXT: s_waitcnt vmcnt(0) 70; GFX900-NEXT: s_setpc_b64 s[30:31] 71; 72; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_u: 73; GFX90A: ; %bb.0: 74; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 75; GFX90A-NEXT: ;;#ASMSTART 76; GFX90A-NEXT: ; def v[0:1] 77; GFX90A-NEXT: ;;#ASMEND 78; GFX90A-NEXT: v_mov_b32_e32 v2, 0 79; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 80; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 81; GFX90A-NEXT: s_waitcnt vmcnt(0) 82; GFX90A-NEXT: s_setpc_b64 s[30:31] 83; 84; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_u: 85; GFX940: ; %bb.0: 86; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 87; GFX940-NEXT: ;;#ASMSTART 88; GFX940-NEXT: ; def v[0:1] 89; GFX940-NEXT: ;;#ASMEND 90; GFX940-NEXT: v_mov_b32_e32 v2, 0 91; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 92; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 93; GFX940-NEXT: s_waitcnt vmcnt(0) 94; GFX940-NEXT: s_setpc_b64 s[30:31] 95 %vec0 = call <4 x half> asm "; def $0", "=v"() 96 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 97 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 poison> 98 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 99 ret void 100} 101 102define void @v_shuffle_v2f16_v3f16__2_u(ptr addrspace(1) inreg %ptr) { 103; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_u: 104; GFX900: ; %bb.0: 105; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 106; GFX900-NEXT: v_mov_b32_e32 v2, 0 107; GFX900-NEXT: ;;#ASMSTART 108; GFX900-NEXT: ; def v[0:1] 109; GFX900-NEXT: ;;#ASMEND 110; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 111; GFX900-NEXT: s_waitcnt vmcnt(0) 112; GFX900-NEXT: s_setpc_b64 s[30:31] 113; 114; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_u: 115; GFX90A: ; %bb.0: 116; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 117; GFX90A-NEXT: v_mov_b32_e32 v2, 0 118; GFX90A-NEXT: ;;#ASMSTART 119; GFX90A-NEXT: ; def v[0:1] 120; GFX90A-NEXT: ;;#ASMEND 121; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 122; GFX90A-NEXT: s_waitcnt vmcnt(0) 123; GFX90A-NEXT: s_setpc_b64 s[30:31] 124; 125; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_u: 126; GFX940: ; %bb.0: 127; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 128; GFX940-NEXT: v_mov_b32_e32 v2, 0 129; GFX940-NEXT: ;;#ASMSTART 130; GFX940-NEXT: ; def v[0:1] 131; GFX940-NEXT: ;;#ASMEND 132; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 133; GFX940-NEXT: s_waitcnt vmcnt(0) 134; GFX940-NEXT: s_setpc_b64 s[30:31] 135 %vec0 = call <4 x half> asm "; def $0", "=v"() 136 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 137 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 poison> 138 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 139 ret void 140} 141 142define void @v_shuffle_v2f16_v3f16__3_u(ptr addrspace(1) inreg %ptr) { 143; GFX9-LABEL: v_shuffle_v2f16_v3f16__3_u: 144; GFX9: ; %bb.0: 145; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 146; GFX9-NEXT: s_setpc_b64 s[30:31] 147 %vec0 = call <4 x half> asm "; def $0", "=v"() 148 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 149 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 poison> 150 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 151 ret void 152} 153 154define void @v_shuffle_v2f16_v3f16__4_u(ptr addrspace(1) inreg %ptr) { 155; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_u: 156; GFX900: ; %bb.0: 157; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 158; GFX900-NEXT: ;;#ASMSTART 159; GFX900-NEXT: ; def v[0:1] 160; GFX900-NEXT: ;;#ASMEND 161; GFX900-NEXT: v_mov_b32_e32 v2, 0 162; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 163; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 164; GFX900-NEXT: s_waitcnt vmcnt(0) 165; GFX900-NEXT: s_setpc_b64 s[30:31] 166; 167; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_u: 168; GFX90A: ; %bb.0: 169; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 170; GFX90A-NEXT: ;;#ASMSTART 171; GFX90A-NEXT: ; def v[0:1] 172; GFX90A-NEXT: ;;#ASMEND 173; GFX90A-NEXT: v_mov_b32_e32 v2, 0 174; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 175; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 176; GFX90A-NEXT: s_waitcnt vmcnt(0) 177; GFX90A-NEXT: s_setpc_b64 s[30:31] 178; 179; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_u: 180; GFX940: ; %bb.0: 181; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 182; GFX940-NEXT: ;;#ASMSTART 183; GFX940-NEXT: ; def v[0:1] 184; GFX940-NEXT: ;;#ASMEND 185; GFX940-NEXT: v_mov_b32_e32 v2, 0 186; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 187; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 188; GFX940-NEXT: s_waitcnt vmcnt(0) 189; GFX940-NEXT: s_setpc_b64 s[30:31] 190 %vec0 = call <4 x half> asm "; def $0", "=v"() 191 %vec1 = call <4 x half> asm "; def $0", "=v"() 192 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 193 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 194 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 poison> 195 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 196 ret void 197} 198 199define void @v_shuffle_v2f16_v3f16__5_u(ptr addrspace(1) inreg %ptr) { 200; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_u: 201; GFX900: ; %bb.0: 202; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 203; GFX900-NEXT: v_mov_b32_e32 v2, 0 204; GFX900-NEXT: ;;#ASMSTART 205; GFX900-NEXT: ; def v[0:1] 206; GFX900-NEXT: ;;#ASMEND 207; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 208; GFX900-NEXT: s_waitcnt vmcnt(0) 209; GFX900-NEXT: s_setpc_b64 s[30:31] 210; 211; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_u: 212; GFX90A: ; %bb.0: 213; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 214; GFX90A-NEXT: v_mov_b32_e32 v2, 0 215; GFX90A-NEXT: ;;#ASMSTART 216; GFX90A-NEXT: ; def v[0:1] 217; GFX90A-NEXT: ;;#ASMEND 218; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 219; GFX90A-NEXT: s_waitcnt vmcnt(0) 220; GFX90A-NEXT: s_setpc_b64 s[30:31] 221; 222; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_u: 223; GFX940: ; %bb.0: 224; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 225; GFX940-NEXT: v_mov_b32_e32 v2, 0 226; GFX940-NEXT: ;;#ASMSTART 227; GFX940-NEXT: ; def v[0:1] 228; GFX940-NEXT: ;;#ASMEND 229; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 230; GFX940-NEXT: s_waitcnt vmcnt(0) 231; GFX940-NEXT: s_setpc_b64 s[30:31] 232 %vec0 = call <4 x half> asm "; def $0", "=v"() 233 %vec1 = call <4 x half> asm "; def $0", "=v"() 234 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 235 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 236 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 poison> 237 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 238 ret void 239} 240 241define void @v_shuffle_v2f16_v3f16__5_0(ptr addrspace(1) inreg %ptr) { 242; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_0: 243; GFX900: ; %bb.0: 244; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 245; GFX900-NEXT: ;;#ASMSTART 246; GFX900-NEXT: ; def v[0:1] 247; GFX900-NEXT: ;;#ASMEND 248; GFX900-NEXT: s_mov_b32 s4, 0x5040100 249; GFX900-NEXT: v_mov_b32_e32 v3, 0 250; GFX900-NEXT: ;;#ASMSTART 251; GFX900-NEXT: ; def v[1:2] 252; GFX900-NEXT: ;;#ASMEND 253; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 254; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 255; GFX900-NEXT: s_waitcnt vmcnt(0) 256; GFX900-NEXT: s_setpc_b64 s[30:31] 257; 258; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_0: 259; GFX90A: ; %bb.0: 260; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 261; GFX90A-NEXT: ;;#ASMSTART 262; GFX90A-NEXT: ; def v[0:1] 263; GFX90A-NEXT: ;;#ASMEND 264; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 265; GFX90A-NEXT: v_mov_b32_e32 v4, 0 266; GFX90A-NEXT: ;;#ASMSTART 267; GFX90A-NEXT: ; def v[2:3] 268; GFX90A-NEXT: ;;#ASMEND 269; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 270; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 271; GFX90A-NEXT: s_waitcnt vmcnt(0) 272; GFX90A-NEXT: s_setpc_b64 s[30:31] 273; 274; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_0: 275; GFX940: ; %bb.0: 276; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 277; GFX940-NEXT: ;;#ASMSTART 278; GFX940-NEXT: ; def v[0:1] 279; GFX940-NEXT: ;;#ASMEND 280; GFX940-NEXT: s_mov_b32 s2, 0x5040100 281; GFX940-NEXT: v_mov_b32_e32 v4, 0 282; GFX940-NEXT: ;;#ASMSTART 283; GFX940-NEXT: ; def v[2:3] 284; GFX940-NEXT: ;;#ASMEND 285; GFX940-NEXT: s_nop 0 286; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 287; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 288; GFX940-NEXT: s_waitcnt vmcnt(0) 289; GFX940-NEXT: s_setpc_b64 s[30:31] 290 %vec0 = call <4 x half> asm "; def $0", "=v"() 291 %vec1 = call <4 x half> asm "; def $0", "=v"() 292 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 293 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 294 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 0> 295 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 296 ret void 297} 298 299define void @v_shuffle_v2f16_v3f16__5_1(ptr addrspace(1) inreg %ptr) { 300; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_1: 301; GFX900: ; %bb.0: 302; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 303; GFX900-NEXT: ;;#ASMSTART 304; GFX900-NEXT: ; def v[0:1] 305; GFX900-NEXT: ;;#ASMEND 306; GFX900-NEXT: s_mov_b32 s4, 0xffff 307; GFX900-NEXT: v_mov_b32_e32 v3, 0 308; GFX900-NEXT: ;;#ASMSTART 309; GFX900-NEXT: ; def v[1:2] 310; GFX900-NEXT: ;;#ASMEND 311; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 312; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 313; GFX900-NEXT: s_waitcnt vmcnt(0) 314; GFX900-NEXT: s_setpc_b64 s[30:31] 315; 316; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_1: 317; GFX90A: ; %bb.0: 318; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 319; GFX90A-NEXT: ;;#ASMSTART 320; GFX90A-NEXT: ; def v[0:1] 321; GFX90A-NEXT: ;;#ASMEND 322; GFX90A-NEXT: s_mov_b32 s4, 0xffff 323; GFX90A-NEXT: v_mov_b32_e32 v4, 0 324; GFX90A-NEXT: ;;#ASMSTART 325; GFX90A-NEXT: ; def v[2:3] 326; GFX90A-NEXT: ;;#ASMEND 327; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 328; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 329; GFX90A-NEXT: s_waitcnt vmcnt(0) 330; GFX90A-NEXT: s_setpc_b64 s[30:31] 331; 332; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_1: 333; GFX940: ; %bb.0: 334; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 335; GFX940-NEXT: ;;#ASMSTART 336; GFX940-NEXT: ; def v[0:1] 337; GFX940-NEXT: ;;#ASMEND 338; GFX940-NEXT: s_mov_b32 s2, 0xffff 339; GFX940-NEXT: v_mov_b32_e32 v4, 0 340; GFX940-NEXT: ;;#ASMSTART 341; GFX940-NEXT: ; def v[2:3] 342; GFX940-NEXT: ;;#ASMEND 343; GFX940-NEXT: s_nop 0 344; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 345; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 346; GFX940-NEXT: s_waitcnt vmcnt(0) 347; GFX940-NEXT: s_setpc_b64 s[30:31] 348 %vec0 = call <4 x half> asm "; def $0", "=v"() 349 %vec1 = call <4 x half> asm "; def $0", "=v"() 350 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 351 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 352 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 1> 353 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 354 ret void 355} 356 357define void @v_shuffle_v2f16_v3f16__5_2(ptr addrspace(1) inreg %ptr) { 358; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_2: 359; GFX900: ; %bb.0: 360; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 361; GFX900-NEXT: ;;#ASMSTART 362; GFX900-NEXT: ; def v[0:1] 363; GFX900-NEXT: ;;#ASMEND 364; GFX900-NEXT: s_mov_b32 s4, 0x5040100 365; GFX900-NEXT: v_mov_b32_e32 v4, 0 366; GFX900-NEXT: ;;#ASMSTART 367; GFX900-NEXT: ; def v[2:3] 368; GFX900-NEXT: ;;#ASMEND 369; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 370; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 371; GFX900-NEXT: s_waitcnt vmcnt(0) 372; GFX900-NEXT: s_setpc_b64 s[30:31] 373; 374; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_2: 375; GFX90A: ; %bb.0: 376; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 377; GFX90A-NEXT: ;;#ASMSTART 378; GFX90A-NEXT: ; def v[0:1] 379; GFX90A-NEXT: ;;#ASMEND 380; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 381; GFX90A-NEXT: v_mov_b32_e32 v4, 0 382; GFX90A-NEXT: ;;#ASMSTART 383; GFX90A-NEXT: ; def v[2:3] 384; GFX90A-NEXT: ;;#ASMEND 385; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 386; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 387; GFX90A-NEXT: s_waitcnt vmcnt(0) 388; GFX90A-NEXT: s_setpc_b64 s[30:31] 389; 390; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_2: 391; GFX940: ; %bb.0: 392; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 393; GFX940-NEXT: ;;#ASMSTART 394; GFX940-NEXT: ; def v[0:1] 395; GFX940-NEXT: ;;#ASMEND 396; GFX940-NEXT: s_mov_b32 s2, 0x5040100 397; GFX940-NEXT: v_mov_b32_e32 v4, 0 398; GFX940-NEXT: ;;#ASMSTART 399; GFX940-NEXT: ; def v[2:3] 400; GFX940-NEXT: ;;#ASMEND 401; GFX940-NEXT: s_nop 0 402; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 403; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 404; GFX940-NEXT: s_waitcnt vmcnt(0) 405; GFX940-NEXT: s_setpc_b64 s[30:31] 406 %vec0 = call <4 x half> asm "; def $0", "=v"() 407 %vec1 = call <4 x half> asm "; def $0", "=v"() 408 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 409 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 410 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 2> 411 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 412 ret void 413} 414 415define void @v_shuffle_v2f16_v3f16__5_3(ptr addrspace(1) inreg %ptr) { 416; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_3: 417; GFX900: ; %bb.0: 418; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 419; GFX900-NEXT: ;;#ASMSTART 420; GFX900-NEXT: ; def v[0:1] 421; GFX900-NEXT: ;;#ASMEND 422; GFX900-NEXT: s_mov_b32 s4, 0x5040100 423; GFX900-NEXT: v_mov_b32_e32 v2, 0 424; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 425; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 426; GFX900-NEXT: s_waitcnt vmcnt(0) 427; GFX900-NEXT: s_setpc_b64 s[30:31] 428; 429; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_3: 430; GFX90A: ; %bb.0: 431; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 432; GFX90A-NEXT: ;;#ASMSTART 433; GFX90A-NEXT: ; def v[0:1] 434; GFX90A-NEXT: ;;#ASMEND 435; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 436; GFX90A-NEXT: v_mov_b32_e32 v2, 0 437; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 438; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 439; GFX90A-NEXT: s_waitcnt vmcnt(0) 440; GFX90A-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_3: 443; GFX940: ; %bb.0: 444; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX940-NEXT: ;;#ASMSTART 446; GFX940-NEXT: ; def v[0:1] 447; GFX940-NEXT: ;;#ASMEND 448; GFX940-NEXT: s_mov_b32 s2, 0x5040100 449; GFX940-NEXT: v_mov_b32_e32 v2, 0 450; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 451; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 452; GFX940-NEXT: s_waitcnt vmcnt(0) 453; GFX940-NEXT: s_setpc_b64 s[30:31] 454 %vec0 = call <4 x half> asm "; def $0", "=v"() 455 %vec1 = call <4 x half> asm "; def $0", "=v"() 456 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 457 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 458 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 3> 459 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 460 ret void 461} 462 463define void @v_shuffle_v2f16_v3f16__5_4(ptr addrspace(1) inreg %ptr) { 464; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_4: 465; GFX900: ; %bb.0: 466; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 467; GFX900-NEXT: ;;#ASMSTART 468; GFX900-NEXT: ; def v[0:1] 469; GFX900-NEXT: ;;#ASMEND 470; GFX900-NEXT: s_mov_b32 s4, 0xffff 471; GFX900-NEXT: v_mov_b32_e32 v2, 0 472; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 473; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 474; GFX900-NEXT: s_waitcnt vmcnt(0) 475; GFX900-NEXT: s_setpc_b64 s[30:31] 476; 477; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_4: 478; GFX90A: ; %bb.0: 479; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 480; GFX90A-NEXT: ;;#ASMSTART 481; GFX90A-NEXT: ; def v[0:1] 482; GFX90A-NEXT: ;;#ASMEND 483; GFX90A-NEXT: s_mov_b32 s4, 0xffff 484; GFX90A-NEXT: v_mov_b32_e32 v2, 0 485; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 486; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 487; GFX90A-NEXT: s_waitcnt vmcnt(0) 488; GFX90A-NEXT: s_setpc_b64 s[30:31] 489; 490; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_4: 491; GFX940: ; %bb.0: 492; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 493; GFX940-NEXT: ;;#ASMSTART 494; GFX940-NEXT: ; def v[0:1] 495; GFX940-NEXT: ;;#ASMEND 496; GFX940-NEXT: s_mov_b32 s2, 0xffff 497; GFX940-NEXT: v_mov_b32_e32 v2, 0 498; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 499; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 500; GFX940-NEXT: s_waitcnt vmcnt(0) 501; GFX940-NEXT: s_setpc_b64 s[30:31] 502 %vec0 = call <4 x half> asm "; def $0", "=v"() 503 %vec1 = call <4 x half> asm "; def $0", "=v"() 504 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 505 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 506 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 4> 507 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 508 ret void 509} 510 511define void @v_shuffle_v2f16_v3f16__5_5(ptr addrspace(1) inreg %ptr) { 512; GFX900-LABEL: v_shuffle_v2f16_v3f16__5_5: 513; GFX900: ; %bb.0: 514; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 515; GFX900-NEXT: ;;#ASMSTART 516; GFX900-NEXT: ; def v[0:1] 517; GFX900-NEXT: ;;#ASMEND 518; GFX900-NEXT: s_mov_b32 s4, 0x5040100 519; GFX900-NEXT: v_mov_b32_e32 v2, 0 520; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 521; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 522; GFX900-NEXT: s_waitcnt vmcnt(0) 523; GFX900-NEXT: s_setpc_b64 s[30:31] 524; 525; GFX90A-LABEL: v_shuffle_v2f16_v3f16__5_5: 526; GFX90A: ; %bb.0: 527; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 528; GFX90A-NEXT: ;;#ASMSTART 529; GFX90A-NEXT: ; def v[0:1] 530; GFX90A-NEXT: ;;#ASMEND 531; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 532; GFX90A-NEXT: v_mov_b32_e32 v2, 0 533; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 534; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 535; GFX90A-NEXT: s_waitcnt vmcnt(0) 536; GFX90A-NEXT: s_setpc_b64 s[30:31] 537; 538; GFX940-LABEL: v_shuffle_v2f16_v3f16__5_5: 539; GFX940: ; %bb.0: 540; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 541; GFX940-NEXT: ;;#ASMSTART 542; GFX940-NEXT: ; def v[0:1] 543; GFX940-NEXT: ;;#ASMEND 544; GFX940-NEXT: s_mov_b32 s2, 0x5040100 545; GFX940-NEXT: v_mov_b32_e32 v2, 0 546; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 547; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 548; GFX940-NEXT: s_waitcnt vmcnt(0) 549; GFX940-NEXT: s_setpc_b64 s[30:31] 550 %vec0 = call <4 x half> asm "; def $0", "=v"() 551 %vec1 = call <4 x half> asm "; def $0", "=v"() 552 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 553 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 554 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 5> 555 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 556 ret void 557} 558 559define void @v_shuffle_v2f16_v3f16__u_0(ptr addrspace(1) inreg %ptr) { 560; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_0: 561; GFX900: ; %bb.0: 562; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX900-NEXT: ;;#ASMSTART 564; GFX900-NEXT: ; def v[0:1] 565; GFX900-NEXT: ;;#ASMEND 566; GFX900-NEXT: v_mov_b32_e32 v2, 0 567; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 568; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 569; GFX900-NEXT: s_waitcnt vmcnt(0) 570; GFX900-NEXT: s_setpc_b64 s[30:31] 571; 572; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_0: 573; GFX90A: ; %bb.0: 574; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 575; GFX90A-NEXT: ;;#ASMSTART 576; GFX90A-NEXT: ; def v[0:1] 577; GFX90A-NEXT: ;;#ASMEND 578; GFX90A-NEXT: v_mov_b32_e32 v2, 0 579; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 580; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 581; GFX90A-NEXT: s_waitcnt vmcnt(0) 582; GFX90A-NEXT: s_setpc_b64 s[30:31] 583; 584; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_0: 585; GFX940: ; %bb.0: 586; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 587; GFX940-NEXT: ;;#ASMSTART 588; GFX940-NEXT: ; def v[0:1] 589; GFX940-NEXT: ;;#ASMEND 590; GFX940-NEXT: v_mov_b32_e32 v2, 0 591; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 592; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 593; GFX940-NEXT: s_waitcnt vmcnt(0) 594; GFX940-NEXT: s_setpc_b64 s[30:31] 595 %vec0 = call <4 x half> asm "; def $0", "=v"() 596 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 597 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 0> 598 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 599 ret void 600} 601 602define void @v_shuffle_v2f16_v3f16__0_0(ptr addrspace(1) inreg %ptr) { 603; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_0: 604; GFX900: ; %bb.0: 605; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 606; GFX900-NEXT: ;;#ASMSTART 607; GFX900-NEXT: ; def v[0:1] 608; GFX900-NEXT: ;;#ASMEND 609; GFX900-NEXT: s_mov_b32 s4, 0x5040100 610; GFX900-NEXT: v_mov_b32_e32 v2, 0 611; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 612; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 613; GFX900-NEXT: s_waitcnt vmcnt(0) 614; GFX900-NEXT: s_setpc_b64 s[30:31] 615; 616; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_0: 617; GFX90A: ; %bb.0: 618; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 619; GFX90A-NEXT: ;;#ASMSTART 620; GFX90A-NEXT: ; def v[0:1] 621; GFX90A-NEXT: ;;#ASMEND 622; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 623; GFX90A-NEXT: v_mov_b32_e32 v2, 0 624; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 625; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 626; GFX90A-NEXT: s_waitcnt vmcnt(0) 627; GFX90A-NEXT: s_setpc_b64 s[30:31] 628; 629; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_0: 630; GFX940: ; %bb.0: 631; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 632; GFX940-NEXT: ;;#ASMSTART 633; GFX940-NEXT: ; def v[0:1] 634; GFX940-NEXT: ;;#ASMEND 635; GFX940-NEXT: s_mov_b32 s2, 0x5040100 636; GFX940-NEXT: v_mov_b32_e32 v2, 0 637; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 638; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 639; GFX940-NEXT: s_waitcnt vmcnt(0) 640; GFX940-NEXT: s_setpc_b64 s[30:31] 641 %vec0 = call <4 x half> asm "; def $0", "=v"() 642 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 643 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> zeroinitializer 644 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 645 ret void 646} 647 648define void @v_shuffle_v2f16_v3f16__1_0(ptr addrspace(1) inreg %ptr) { 649; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_0: 650; GFX900: ; %bb.0: 651; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; GFX900-NEXT: ;;#ASMSTART 653; GFX900-NEXT: ; def v[0:1] 654; GFX900-NEXT: ;;#ASMEND 655; GFX900-NEXT: v_mov_b32_e32 v2, 0 656; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 657; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 658; GFX900-NEXT: s_waitcnt vmcnt(0) 659; GFX900-NEXT: s_setpc_b64 s[30:31] 660; 661; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_0: 662; GFX90A: ; %bb.0: 663; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 664; GFX90A-NEXT: ;;#ASMSTART 665; GFX90A-NEXT: ; def v[0:1] 666; GFX90A-NEXT: ;;#ASMEND 667; GFX90A-NEXT: v_mov_b32_e32 v2, 0 668; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 669; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 670; GFX90A-NEXT: s_waitcnt vmcnt(0) 671; GFX90A-NEXT: s_setpc_b64 s[30:31] 672; 673; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_0: 674; GFX940: ; %bb.0: 675; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 676; GFX940-NEXT: ;;#ASMSTART 677; GFX940-NEXT: ; def v[0:1] 678; GFX940-NEXT: ;;#ASMEND 679; GFX940-NEXT: v_mov_b32_e32 v2, 0 680; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 681; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 682; GFX940-NEXT: s_waitcnt vmcnt(0) 683; GFX940-NEXT: s_setpc_b64 s[30:31] 684 %vec0 = call <4 x half> asm "; def $0", "=v"() 685 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 686 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 0> 687 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 688 ret void 689} 690 691define void @v_shuffle_v2f16_v3f16__2_0(ptr addrspace(1) inreg %ptr) { 692; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_0: 693; GFX900: ; %bb.0: 694; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 695; GFX900-NEXT: ;;#ASMSTART 696; GFX900-NEXT: ; def v[0:1] 697; GFX900-NEXT: ;;#ASMEND 698; GFX900-NEXT: s_mov_b32 s4, 0x5040100 699; GFX900-NEXT: v_mov_b32_e32 v2, 0 700; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 701; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 702; GFX900-NEXT: s_waitcnt vmcnt(0) 703; GFX900-NEXT: s_setpc_b64 s[30:31] 704; 705; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_0: 706; GFX90A: ; %bb.0: 707; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 708; GFX90A-NEXT: ;;#ASMSTART 709; GFX90A-NEXT: ; def v[0:1] 710; GFX90A-NEXT: ;;#ASMEND 711; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 712; GFX90A-NEXT: v_mov_b32_e32 v2, 0 713; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 714; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 715; GFX90A-NEXT: s_waitcnt vmcnt(0) 716; GFX90A-NEXT: s_setpc_b64 s[30:31] 717; 718; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_0: 719; GFX940: ; %bb.0: 720; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 721; GFX940-NEXT: ;;#ASMSTART 722; GFX940-NEXT: ; def v[0:1] 723; GFX940-NEXT: ;;#ASMEND 724; GFX940-NEXT: s_mov_b32 s2, 0x5040100 725; GFX940-NEXT: v_mov_b32_e32 v2, 0 726; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 727; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 728; GFX940-NEXT: s_waitcnt vmcnt(0) 729; GFX940-NEXT: s_setpc_b64 s[30:31] 730 %vec0 = call <4 x half> asm "; def $0", "=v"() 731 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 732 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 0> 733 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 734 ret void 735} 736 737define void @v_shuffle_v2f16_v3f16__3_0(ptr addrspace(1) inreg %ptr) { 738; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_0: 739; GFX900: ; %bb.0: 740; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 741; GFX900-NEXT: ;;#ASMSTART 742; GFX900-NEXT: ; def v[0:1] 743; GFX900-NEXT: ;;#ASMEND 744; GFX900-NEXT: v_mov_b32_e32 v2, 0 745; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 746; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 747; GFX900-NEXT: s_waitcnt vmcnt(0) 748; GFX900-NEXT: s_setpc_b64 s[30:31] 749; 750; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_0: 751; GFX90A: ; %bb.0: 752; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 753; GFX90A-NEXT: ;;#ASMSTART 754; GFX90A-NEXT: ; def v[0:1] 755; GFX90A-NEXT: ;;#ASMEND 756; GFX90A-NEXT: v_mov_b32_e32 v2, 0 757; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 758; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 759; GFX90A-NEXT: s_waitcnt vmcnt(0) 760; GFX90A-NEXT: s_setpc_b64 s[30:31] 761; 762; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_0: 763; GFX940: ; %bb.0: 764; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 765; GFX940-NEXT: ;;#ASMSTART 766; GFX940-NEXT: ; def v[0:1] 767; GFX940-NEXT: ;;#ASMEND 768; GFX940-NEXT: v_mov_b32_e32 v2, 0 769; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 770; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 771; GFX940-NEXT: s_waitcnt vmcnt(0) 772; GFX940-NEXT: s_setpc_b64 s[30:31] 773 %vec0 = call <4 x half> asm "; def $0", "=v"() 774 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 775 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 0> 776 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 777 ret void 778} 779 780define void @v_shuffle_v2f16_v3f16__4_0(ptr addrspace(1) inreg %ptr) { 781; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_0: 782; GFX900: ; %bb.0: 783; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 784; GFX900-NEXT: ;;#ASMSTART 785; GFX900-NEXT: ; def v[0:1] 786; GFX900-NEXT: ;;#ASMEND 787; GFX900-NEXT: v_mov_b32_e32 v3, 0 788; GFX900-NEXT: ;;#ASMSTART 789; GFX900-NEXT: ; def v[1:2] 790; GFX900-NEXT: ;;#ASMEND 791; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 792; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 793; GFX900-NEXT: s_waitcnt vmcnt(0) 794; GFX900-NEXT: s_setpc_b64 s[30:31] 795; 796; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_0: 797; GFX90A: ; %bb.0: 798; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 799; GFX90A-NEXT: ;;#ASMSTART 800; GFX90A-NEXT: ; def v[0:1] 801; GFX90A-NEXT: ;;#ASMEND 802; GFX90A-NEXT: v_mov_b32_e32 v4, 0 803; GFX90A-NEXT: ;;#ASMSTART 804; GFX90A-NEXT: ; def v[2:3] 805; GFX90A-NEXT: ;;#ASMEND 806; GFX90A-NEXT: v_alignbit_b32 v0, v0, v2, 16 807; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 808; GFX90A-NEXT: s_waitcnt vmcnt(0) 809; GFX90A-NEXT: s_setpc_b64 s[30:31] 810; 811; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_0: 812; GFX940: ; %bb.0: 813; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 814; GFX940-NEXT: ;;#ASMSTART 815; GFX940-NEXT: ; def v[0:1] 816; GFX940-NEXT: ;;#ASMEND 817; GFX940-NEXT: v_mov_b32_e32 v4, 0 818; GFX940-NEXT: ;;#ASMSTART 819; GFX940-NEXT: ; def v[2:3] 820; GFX940-NEXT: ;;#ASMEND 821; GFX940-NEXT: s_nop 0 822; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 823; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 824; GFX940-NEXT: s_waitcnt vmcnt(0) 825; GFX940-NEXT: s_setpc_b64 s[30:31] 826 %vec0 = call <4 x half> asm "; def $0", "=v"() 827 %vec1 = call <4 x half> asm "; def $0", "=v"() 828 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 829 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 830 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 0> 831 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 832 ret void 833} 834 835define void @v_shuffle_v2f16_v3f16__u_1(ptr addrspace(1) inreg %ptr) { 836; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_1: 837; GFX900: ; %bb.0: 838; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 839; GFX900-NEXT: v_mov_b32_e32 v2, 0 840; GFX900-NEXT: ;;#ASMSTART 841; GFX900-NEXT: ; def v[0:1] 842; GFX900-NEXT: ;;#ASMEND 843; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 844; GFX900-NEXT: s_waitcnt vmcnt(0) 845; GFX900-NEXT: s_setpc_b64 s[30:31] 846; 847; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_1: 848; GFX90A: ; %bb.0: 849; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 850; GFX90A-NEXT: v_mov_b32_e32 v2, 0 851; GFX90A-NEXT: ;;#ASMSTART 852; GFX90A-NEXT: ; def v[0:1] 853; GFX90A-NEXT: ;;#ASMEND 854; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 855; GFX90A-NEXT: s_waitcnt vmcnt(0) 856; GFX90A-NEXT: s_setpc_b64 s[30:31] 857; 858; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_1: 859; GFX940: ; %bb.0: 860; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 861; GFX940-NEXT: v_mov_b32_e32 v2, 0 862; GFX940-NEXT: ;;#ASMSTART 863; GFX940-NEXT: ; def v[0:1] 864; GFX940-NEXT: ;;#ASMEND 865; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 866; GFX940-NEXT: s_waitcnt vmcnt(0) 867; GFX940-NEXT: s_setpc_b64 s[30:31] 868 %vec0 = call <4 x half> asm "; def $0", "=v"() 869 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 870 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 1> 871 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 872 ret void 873} 874 875define void @v_shuffle_v2f16_v3f16__0_1(ptr addrspace(1) inreg %ptr) { 876; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_1: 877; GFX900: ; %bb.0: 878; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 879; GFX900-NEXT: v_mov_b32_e32 v2, 0 880; GFX900-NEXT: ;;#ASMSTART 881; GFX900-NEXT: ; def v[0:1] 882; GFX900-NEXT: ;;#ASMEND 883; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 884; GFX900-NEXT: s_waitcnt vmcnt(0) 885; GFX900-NEXT: s_setpc_b64 s[30:31] 886; 887; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_1: 888; GFX90A: ; %bb.0: 889; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 890; GFX90A-NEXT: v_mov_b32_e32 v2, 0 891; GFX90A-NEXT: ;;#ASMSTART 892; GFX90A-NEXT: ; def v[0:1] 893; GFX90A-NEXT: ;;#ASMEND 894; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 895; GFX90A-NEXT: s_waitcnt vmcnt(0) 896; GFX90A-NEXT: s_setpc_b64 s[30:31] 897; 898; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_1: 899; GFX940: ; %bb.0: 900; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 901; GFX940-NEXT: v_mov_b32_e32 v2, 0 902; GFX940-NEXT: ;;#ASMSTART 903; GFX940-NEXT: ; def v[0:1] 904; GFX940-NEXT: ;;#ASMEND 905; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 906; GFX940-NEXT: s_waitcnt vmcnt(0) 907; GFX940-NEXT: s_setpc_b64 s[30:31] 908 %vec0 = call <4 x half> asm "; def $0", "=v"() 909 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 910 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 1> 911 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 912 ret void 913} 914 915define void @v_shuffle_v2f16_v3f16__1_1(ptr addrspace(1) inreg %ptr) { 916; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_1: 917; GFX900: ; %bb.0: 918; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 919; GFX900-NEXT: ;;#ASMSTART 920; GFX900-NEXT: ; def v[0:1] 921; GFX900-NEXT: ;;#ASMEND 922; GFX900-NEXT: s_mov_b32 s4, 0x7060302 923; GFX900-NEXT: v_mov_b32_e32 v2, 0 924; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 925; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 926; GFX900-NEXT: s_waitcnt vmcnt(0) 927; GFX900-NEXT: s_setpc_b64 s[30:31] 928; 929; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_1: 930; GFX90A: ; %bb.0: 931; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 932; GFX90A-NEXT: ;;#ASMSTART 933; GFX90A-NEXT: ; def v[0:1] 934; GFX90A-NEXT: ;;#ASMEND 935; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 936; GFX90A-NEXT: v_mov_b32_e32 v2, 0 937; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 938; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 939; GFX90A-NEXT: s_waitcnt vmcnt(0) 940; GFX90A-NEXT: s_setpc_b64 s[30:31] 941; 942; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_1: 943; GFX940: ; %bb.0: 944; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 945; GFX940-NEXT: ;;#ASMSTART 946; GFX940-NEXT: ; def v[0:1] 947; GFX940-NEXT: ;;#ASMEND 948; GFX940-NEXT: s_mov_b32 s2, 0x7060302 949; GFX940-NEXT: v_mov_b32_e32 v2, 0 950; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 951; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 952; GFX940-NEXT: s_waitcnt vmcnt(0) 953; GFX940-NEXT: s_setpc_b64 s[30:31] 954 %vec0 = call <4 x half> asm "; def $0", "=v"() 955 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 956 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 1> 957 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 958 ret void 959} 960 961define void @v_shuffle_v2f16_v3f16__2_1(ptr addrspace(1) inreg %ptr) { 962; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_1: 963; GFX900: ; %bb.0: 964; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 965; GFX900-NEXT: ;;#ASMSTART 966; GFX900-NEXT: ; def v[0:1] 967; GFX900-NEXT: ;;#ASMEND 968; GFX900-NEXT: s_mov_b32 s4, 0xffff 969; GFX900-NEXT: v_mov_b32_e32 v2, 0 970; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 971; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 972; GFX900-NEXT: s_waitcnt vmcnt(0) 973; GFX900-NEXT: s_setpc_b64 s[30:31] 974; 975; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_1: 976; GFX90A: ; %bb.0: 977; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 978; GFX90A-NEXT: ;;#ASMSTART 979; GFX90A-NEXT: ; def v[0:1] 980; GFX90A-NEXT: ;;#ASMEND 981; GFX90A-NEXT: s_mov_b32 s4, 0xffff 982; GFX90A-NEXT: v_mov_b32_e32 v2, 0 983; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 984; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 985; GFX90A-NEXT: s_waitcnt vmcnt(0) 986; GFX90A-NEXT: s_setpc_b64 s[30:31] 987; 988; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_1: 989; GFX940: ; %bb.0: 990; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 991; GFX940-NEXT: ;;#ASMSTART 992; GFX940-NEXT: ; def v[0:1] 993; GFX940-NEXT: ;;#ASMEND 994; GFX940-NEXT: s_mov_b32 s2, 0xffff 995; GFX940-NEXT: v_mov_b32_e32 v2, 0 996; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 997; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 998; GFX940-NEXT: s_waitcnt vmcnt(0) 999; GFX940-NEXT: s_setpc_b64 s[30:31] 1000 %vec0 = call <4 x half> asm "; def $0", "=v"() 1001 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1002 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 1> 1003 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1004 ret void 1005} 1006 1007define void @v_shuffle_v2f16_v3f16__3_1(ptr addrspace(1) inreg %ptr) { 1008; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_1: 1009; GFX900: ; %bb.0: 1010; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1011; GFX900-NEXT: v_mov_b32_e32 v2, 0 1012; GFX900-NEXT: ;;#ASMSTART 1013; GFX900-NEXT: ; def v[0:1] 1014; GFX900-NEXT: ;;#ASMEND 1015; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1016; GFX900-NEXT: s_waitcnt vmcnt(0) 1017; GFX900-NEXT: s_setpc_b64 s[30:31] 1018; 1019; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_1: 1020; GFX90A: ; %bb.0: 1021; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1022; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1023; GFX90A-NEXT: ;;#ASMSTART 1024; GFX90A-NEXT: ; def v[0:1] 1025; GFX90A-NEXT: ;;#ASMEND 1026; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1027; GFX90A-NEXT: s_waitcnt vmcnt(0) 1028; GFX90A-NEXT: s_setpc_b64 s[30:31] 1029; 1030; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_1: 1031; GFX940: ; %bb.0: 1032; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1033; GFX940-NEXT: v_mov_b32_e32 v2, 0 1034; GFX940-NEXT: ;;#ASMSTART 1035; GFX940-NEXT: ; def v[0:1] 1036; GFX940-NEXT: ;;#ASMEND 1037; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1038; GFX940-NEXT: s_waitcnt vmcnt(0) 1039; GFX940-NEXT: s_setpc_b64 s[30:31] 1040 %vec0 = call <4 x half> asm "; def $0", "=v"() 1041 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1042 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 1> 1043 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1044 ret void 1045} 1046 1047define void @v_shuffle_v2f16_v3f16__4_1(ptr addrspace(1) inreg %ptr) { 1048; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_1: 1049; GFX900: ; %bb.0: 1050; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1051; GFX900-NEXT: ;;#ASMSTART 1052; GFX900-NEXT: ; def v[0:1] 1053; GFX900-NEXT: ;;#ASMEND 1054; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1055; GFX900-NEXT: v_mov_b32_e32 v3, 0 1056; GFX900-NEXT: ;;#ASMSTART 1057; GFX900-NEXT: ; def v[1:2] 1058; GFX900-NEXT: ;;#ASMEND 1059; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 1060; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 1061; GFX900-NEXT: s_waitcnt vmcnt(0) 1062; GFX900-NEXT: s_setpc_b64 s[30:31] 1063; 1064; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_1: 1065; GFX90A: ; %bb.0: 1066; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1067; GFX90A-NEXT: ;;#ASMSTART 1068; GFX90A-NEXT: ; def v[0:1] 1069; GFX90A-NEXT: ;;#ASMEND 1070; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1071; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1072; GFX90A-NEXT: ;;#ASMSTART 1073; GFX90A-NEXT: ; def v[2:3] 1074; GFX90A-NEXT: ;;#ASMEND 1075; GFX90A-NEXT: v_perm_b32 v0, v0, v2, s4 1076; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1077; GFX90A-NEXT: s_waitcnt vmcnt(0) 1078; GFX90A-NEXT: s_setpc_b64 s[30:31] 1079; 1080; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_1: 1081; GFX940: ; %bb.0: 1082; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1083; GFX940-NEXT: ;;#ASMSTART 1084; GFX940-NEXT: ; def v[0:1] 1085; GFX940-NEXT: ;;#ASMEND 1086; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1087; GFX940-NEXT: v_mov_b32_e32 v4, 0 1088; GFX940-NEXT: ;;#ASMSTART 1089; GFX940-NEXT: ; def v[2:3] 1090; GFX940-NEXT: ;;#ASMEND 1091; GFX940-NEXT: s_nop 0 1092; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 1093; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1094; GFX940-NEXT: s_waitcnt vmcnt(0) 1095; GFX940-NEXT: s_setpc_b64 s[30:31] 1096 %vec0 = call <4 x half> asm "; def $0", "=v"() 1097 %vec1 = call <4 x half> asm "; def $0", "=v"() 1098 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1099 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1100 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 1> 1101 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1102 ret void 1103} 1104 1105define void @v_shuffle_v2f16_v3f16__u_2(ptr addrspace(1) inreg %ptr) { 1106; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_2: 1107; GFX900: ; %bb.0: 1108; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1109; GFX900-NEXT: ;;#ASMSTART 1110; GFX900-NEXT: ; def v[0:1] 1111; GFX900-NEXT: ;;#ASMEND 1112; GFX900-NEXT: v_mov_b32_e32 v2, 0 1113; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1114; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1115; GFX900-NEXT: s_waitcnt vmcnt(0) 1116; GFX900-NEXT: s_setpc_b64 s[30:31] 1117; 1118; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_2: 1119; GFX90A: ; %bb.0: 1120; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1121; GFX90A-NEXT: ;;#ASMSTART 1122; GFX90A-NEXT: ; def v[0:1] 1123; GFX90A-NEXT: ;;#ASMEND 1124; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1125; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1126; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1127; GFX90A-NEXT: s_waitcnt vmcnt(0) 1128; GFX90A-NEXT: s_setpc_b64 s[30:31] 1129; 1130; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_2: 1131; GFX940: ; %bb.0: 1132; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1133; GFX940-NEXT: ;;#ASMSTART 1134; GFX940-NEXT: ; def v[0:1] 1135; GFX940-NEXT: ;;#ASMEND 1136; GFX940-NEXT: v_mov_b32_e32 v2, 0 1137; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1138; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1139; GFX940-NEXT: s_waitcnt vmcnt(0) 1140; GFX940-NEXT: s_setpc_b64 s[30:31] 1141 %vec0 = call <4 x half> asm "; def $0", "=v"() 1142 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1143 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 2> 1144 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1145 ret void 1146} 1147 1148define void @v_shuffle_v2f16_v3f16__0_2(ptr addrspace(1) inreg %ptr) { 1149; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_2: 1150; GFX900: ; %bb.0: 1151; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1152; GFX900-NEXT: ;;#ASMSTART 1153; GFX900-NEXT: ; def v[0:1] 1154; GFX900-NEXT: ;;#ASMEND 1155; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1156; GFX900-NEXT: v_mov_b32_e32 v2, 0 1157; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 1158; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1159; GFX900-NEXT: s_waitcnt vmcnt(0) 1160; GFX900-NEXT: s_setpc_b64 s[30:31] 1161; 1162; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_2: 1163; GFX90A: ; %bb.0: 1164; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1165; GFX90A-NEXT: ;;#ASMSTART 1166; GFX90A-NEXT: ; def v[0:1] 1167; GFX90A-NEXT: ;;#ASMEND 1168; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1169; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1170; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 1171; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1172; GFX90A-NEXT: s_waitcnt vmcnt(0) 1173; GFX90A-NEXT: s_setpc_b64 s[30:31] 1174; 1175; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_2: 1176; GFX940: ; %bb.0: 1177; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1178; GFX940-NEXT: ;;#ASMSTART 1179; GFX940-NEXT: ; def v[0:1] 1180; GFX940-NEXT: ;;#ASMEND 1181; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1182; GFX940-NEXT: v_mov_b32_e32 v2, 0 1183; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 1184; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1185; GFX940-NEXT: s_waitcnt vmcnt(0) 1186; GFX940-NEXT: s_setpc_b64 s[30:31] 1187 %vec0 = call <4 x half> asm "; def $0", "=v"() 1188 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1189 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 2> 1190 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1191 ret void 1192} 1193 1194define void @v_shuffle_v2f16_v3f16__1_2(ptr addrspace(1) inreg %ptr) { 1195; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_2: 1196; GFX900: ; %bb.0: 1197; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1198; GFX900-NEXT: ;;#ASMSTART 1199; GFX900-NEXT: ; def v[0:1] 1200; GFX900-NEXT: ;;#ASMEND 1201; GFX900-NEXT: v_mov_b32_e32 v2, 0 1202; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 1203; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1204; GFX900-NEXT: s_waitcnt vmcnt(0) 1205; GFX900-NEXT: s_setpc_b64 s[30:31] 1206; 1207; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_2: 1208; GFX90A: ; %bb.0: 1209; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1210; GFX90A-NEXT: ;;#ASMSTART 1211; GFX90A-NEXT: ; def v[0:1] 1212; GFX90A-NEXT: ;;#ASMEND 1213; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1214; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 1215; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1216; GFX90A-NEXT: s_waitcnt vmcnt(0) 1217; GFX90A-NEXT: s_setpc_b64 s[30:31] 1218; 1219; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_2: 1220; GFX940: ; %bb.0: 1221; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1222; GFX940-NEXT: ;;#ASMSTART 1223; GFX940-NEXT: ; def v[0:1] 1224; GFX940-NEXT: ;;#ASMEND 1225; GFX940-NEXT: v_mov_b32_e32 v2, 0 1226; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 1227; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1228; GFX940-NEXT: s_waitcnt vmcnt(0) 1229; GFX940-NEXT: s_setpc_b64 s[30:31] 1230 %vec0 = call <4 x half> asm "; def $0", "=v"() 1231 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1232 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 2> 1233 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1234 ret void 1235} 1236 1237define void @v_shuffle_v2f16_v3f16__2_2(ptr addrspace(1) inreg %ptr) { 1238; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_2: 1239; GFX900: ; %bb.0: 1240; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1241; GFX900-NEXT: ;;#ASMSTART 1242; GFX900-NEXT: ; def v[0:1] 1243; GFX900-NEXT: ;;#ASMEND 1244; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1245; GFX900-NEXT: v_mov_b32_e32 v2, 0 1246; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 1247; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1248; GFX900-NEXT: s_waitcnt vmcnt(0) 1249; GFX900-NEXT: s_setpc_b64 s[30:31] 1250; 1251; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_2: 1252; GFX90A: ; %bb.0: 1253; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1254; GFX90A-NEXT: ;;#ASMSTART 1255; GFX90A-NEXT: ; def v[0:1] 1256; GFX90A-NEXT: ;;#ASMEND 1257; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1258; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1259; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 1260; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1261; GFX90A-NEXT: s_waitcnt vmcnt(0) 1262; GFX90A-NEXT: s_setpc_b64 s[30:31] 1263; 1264; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_2: 1265; GFX940: ; %bb.0: 1266; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1267; GFX940-NEXT: ;;#ASMSTART 1268; GFX940-NEXT: ; def v[0:1] 1269; GFX940-NEXT: ;;#ASMEND 1270; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1271; GFX940-NEXT: v_mov_b32_e32 v2, 0 1272; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 1273; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1274; GFX940-NEXT: s_waitcnt vmcnt(0) 1275; GFX940-NEXT: s_setpc_b64 s[30:31] 1276 %vec0 = call <4 x half> asm "; def $0", "=v"() 1277 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1278 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 2> 1279 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1280 ret void 1281} 1282 1283define void @v_shuffle_v2f16_v3f16__3_2(ptr addrspace(1) inreg %ptr) { 1284; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_2: 1285; GFX900: ; %bb.0: 1286; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1287; GFX900-NEXT: ;;#ASMSTART 1288; GFX900-NEXT: ; def v[0:1] 1289; GFX900-NEXT: ;;#ASMEND 1290; GFX900-NEXT: v_mov_b32_e32 v2, 0 1291; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1292; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1293; GFX900-NEXT: s_waitcnt vmcnt(0) 1294; GFX900-NEXT: s_setpc_b64 s[30:31] 1295; 1296; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_2: 1297; GFX90A: ; %bb.0: 1298; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1299; GFX90A-NEXT: ;;#ASMSTART 1300; GFX90A-NEXT: ; def v[0:1] 1301; GFX90A-NEXT: ;;#ASMEND 1302; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1303; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1304; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1305; GFX90A-NEXT: s_waitcnt vmcnt(0) 1306; GFX90A-NEXT: s_setpc_b64 s[30:31] 1307; 1308; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_2: 1309; GFX940: ; %bb.0: 1310; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1311; GFX940-NEXT: ;;#ASMSTART 1312; GFX940-NEXT: ; def v[0:1] 1313; GFX940-NEXT: ;;#ASMEND 1314; GFX940-NEXT: v_mov_b32_e32 v2, 0 1315; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1316; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1317; GFX940-NEXT: s_waitcnt vmcnt(0) 1318; GFX940-NEXT: s_setpc_b64 s[30:31] 1319 %vec0 = call <4 x half> asm "; def $0", "=v"() 1320 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1321 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 2> 1322 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1323 ret void 1324} 1325 1326define void @v_shuffle_v2f16_v3f16__4_2(ptr addrspace(1) inreg %ptr) { 1327; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_2: 1328; GFX900: ; %bb.0: 1329; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1330; GFX900-NEXT: ;;#ASMSTART 1331; GFX900-NEXT: ; def v[0:1] 1332; GFX900-NEXT: ;;#ASMEND 1333; GFX900-NEXT: v_mov_b32_e32 v4, 0 1334; GFX900-NEXT: ;;#ASMSTART 1335; GFX900-NEXT: ; def v[2:3] 1336; GFX900-NEXT: ;;#ASMEND 1337; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16 1338; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1339; GFX900-NEXT: s_waitcnt vmcnt(0) 1340; GFX900-NEXT: s_setpc_b64 s[30:31] 1341; 1342; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_2: 1343; GFX90A: ; %bb.0: 1344; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1345; GFX90A-NEXT: ;;#ASMSTART 1346; GFX90A-NEXT: ; def v[0:1] 1347; GFX90A-NEXT: ;;#ASMEND 1348; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1349; GFX90A-NEXT: ;;#ASMSTART 1350; GFX90A-NEXT: ; def v[2:3] 1351; GFX90A-NEXT: ;;#ASMEND 1352; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16 1353; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1354; GFX90A-NEXT: s_waitcnt vmcnt(0) 1355; GFX90A-NEXT: s_setpc_b64 s[30:31] 1356; 1357; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_2: 1358; GFX940: ; %bb.0: 1359; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1360; GFX940-NEXT: ;;#ASMSTART 1361; GFX940-NEXT: ; def v[0:1] 1362; GFX940-NEXT: ;;#ASMEND 1363; GFX940-NEXT: v_mov_b32_e32 v4, 0 1364; GFX940-NEXT: ;;#ASMSTART 1365; GFX940-NEXT: ; def v[2:3] 1366; GFX940-NEXT: ;;#ASMEND 1367; GFX940-NEXT: s_nop 0 1368; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 1369; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1370; GFX940-NEXT: s_waitcnt vmcnt(0) 1371; GFX940-NEXT: s_setpc_b64 s[30:31] 1372 %vec0 = call <4 x half> asm "; def $0", "=v"() 1373 %vec1 = call <4 x half> asm "; def $0", "=v"() 1374 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1375 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1376 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 2> 1377 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1378 ret void 1379} 1380 1381define void @v_shuffle_v2f16_v3f16__u_3(ptr addrspace(1) inreg %ptr) { 1382; GFX9-LABEL: v_shuffle_v2f16_v3f16__u_3: 1383; GFX9: ; %bb.0: 1384; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1385; GFX9-NEXT: s_setpc_b64 s[30:31] 1386 %vec0 = call <4 x half> asm "; def $0", "=v"() 1387 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1388 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 3> 1389 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1390 ret void 1391} 1392 1393define void @v_shuffle_v2f16_v3f16__0_3(ptr addrspace(1) inreg %ptr) { 1394; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_3: 1395; GFX900: ; %bb.0: 1396; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1397; GFX900-NEXT: v_mov_b32_e32 v2, 0 1398; GFX900-NEXT: ;;#ASMSTART 1399; GFX900-NEXT: ; def v[0:1] 1400; GFX900-NEXT: ;;#ASMEND 1401; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1402; GFX900-NEXT: s_waitcnt vmcnt(0) 1403; GFX900-NEXT: s_setpc_b64 s[30:31] 1404; 1405; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_3: 1406; GFX90A: ; %bb.0: 1407; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1408; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1409; GFX90A-NEXT: ;;#ASMSTART 1410; GFX90A-NEXT: ; def v[0:1] 1411; GFX90A-NEXT: ;;#ASMEND 1412; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1413; GFX90A-NEXT: s_waitcnt vmcnt(0) 1414; GFX90A-NEXT: s_setpc_b64 s[30:31] 1415; 1416; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_3: 1417; GFX940: ; %bb.0: 1418; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1419; GFX940-NEXT: v_mov_b32_e32 v2, 0 1420; GFX940-NEXT: ;;#ASMSTART 1421; GFX940-NEXT: ; def v[0:1] 1422; GFX940-NEXT: ;;#ASMEND 1423; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1424; GFX940-NEXT: s_waitcnt vmcnt(0) 1425; GFX940-NEXT: s_setpc_b64 s[30:31] 1426 %vec0 = call <4 x half> asm "; def $0", "=v"() 1427 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1428 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 3> 1429 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1430 ret void 1431} 1432 1433define void @v_shuffle_v2f16_v3f16__1_3(ptr addrspace(1) inreg %ptr) { 1434; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_3: 1435; GFX900: ; %bb.0: 1436; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1437; GFX900-NEXT: ;;#ASMSTART 1438; GFX900-NEXT: ; def v[0:1] 1439; GFX900-NEXT: ;;#ASMEND 1440; GFX900-NEXT: v_mov_b32_e32 v2, 0 1441; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 1442; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1443; GFX900-NEXT: s_waitcnt vmcnt(0) 1444; GFX900-NEXT: s_setpc_b64 s[30:31] 1445; 1446; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_3: 1447; GFX90A: ; %bb.0: 1448; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1449; GFX90A-NEXT: ;;#ASMSTART 1450; GFX90A-NEXT: ; def v[0:1] 1451; GFX90A-NEXT: ;;#ASMEND 1452; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1453; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 1454; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1455; GFX90A-NEXT: s_waitcnt vmcnt(0) 1456; GFX90A-NEXT: s_setpc_b64 s[30:31] 1457; 1458; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_3: 1459; GFX940: ; %bb.0: 1460; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1461; GFX940-NEXT: ;;#ASMSTART 1462; GFX940-NEXT: ; def v[0:1] 1463; GFX940-NEXT: ;;#ASMEND 1464; GFX940-NEXT: v_mov_b32_e32 v2, 0 1465; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 1466; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1467; GFX940-NEXT: s_waitcnt vmcnt(0) 1468; GFX940-NEXT: s_setpc_b64 s[30:31] 1469 %vec0 = call <4 x half> asm "; def $0", "=v"() 1470 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1471 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 3> 1472 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1473 ret void 1474} 1475 1476define void @v_shuffle_v2f16_v3f16__2_3(ptr addrspace(1) inreg %ptr) { 1477; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_3: 1478; GFX900: ; %bb.0: 1479; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1480; GFX900-NEXT: v_mov_b32_e32 v2, 0 1481; GFX900-NEXT: ;;#ASMSTART 1482; GFX900-NEXT: ; def v[0:1] 1483; GFX900-NEXT: ;;#ASMEND 1484; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1485; GFX900-NEXT: s_waitcnt vmcnt(0) 1486; GFX900-NEXT: s_setpc_b64 s[30:31] 1487; 1488; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_3: 1489; GFX90A: ; %bb.0: 1490; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1491; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1492; GFX90A-NEXT: ;;#ASMSTART 1493; GFX90A-NEXT: ; def v[0:1] 1494; GFX90A-NEXT: ;;#ASMEND 1495; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1496; GFX90A-NEXT: s_waitcnt vmcnt(0) 1497; GFX90A-NEXT: s_setpc_b64 s[30:31] 1498; 1499; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_3: 1500; GFX940: ; %bb.0: 1501; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1502; GFX940-NEXT: v_mov_b32_e32 v2, 0 1503; GFX940-NEXT: ;;#ASMSTART 1504; GFX940-NEXT: ; def v[0:1] 1505; GFX940-NEXT: ;;#ASMEND 1506; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1507; GFX940-NEXT: s_waitcnt vmcnt(0) 1508; GFX940-NEXT: s_setpc_b64 s[30:31] 1509 %vec0 = call <4 x half> asm "; def $0", "=v"() 1510 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1511 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 3> 1512 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1513 ret void 1514} 1515 1516define void @v_shuffle_v2f16_v3f16__3_3(ptr addrspace(1) inreg %ptr) { 1517; GFX9-LABEL: v_shuffle_v2f16_v3f16__3_3: 1518; GFX9: ; %bb.0: 1519; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1520; GFX9-NEXT: s_setpc_b64 s[30:31] 1521 %vec0 = call <4 x half> asm "; def $0", "=v"() 1522 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1523 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 3> 1524 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1525 ret void 1526} 1527 1528define void @v_shuffle_v2f16_v3f16__4_3(ptr addrspace(1) inreg %ptr) { 1529; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_3: 1530; GFX900: ; %bb.0: 1531; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1532; GFX900-NEXT: ;;#ASMSTART 1533; GFX900-NEXT: ; def v[0:1] 1534; GFX900-NEXT: ;;#ASMEND 1535; GFX900-NEXT: v_mov_b32_e32 v2, 0 1536; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 1537; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1538; GFX900-NEXT: s_waitcnt vmcnt(0) 1539; GFX900-NEXT: s_setpc_b64 s[30:31] 1540; 1541; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_3: 1542; GFX90A: ; %bb.0: 1543; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1544; GFX90A-NEXT: ;;#ASMSTART 1545; GFX90A-NEXT: ; def v[0:1] 1546; GFX90A-NEXT: ;;#ASMEND 1547; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1548; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 1549; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1550; GFX90A-NEXT: s_waitcnt vmcnt(0) 1551; GFX90A-NEXT: s_setpc_b64 s[30:31] 1552; 1553; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_3: 1554; GFX940: ; %bb.0: 1555; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1556; GFX940-NEXT: ;;#ASMSTART 1557; GFX940-NEXT: ; def v[0:1] 1558; GFX940-NEXT: ;;#ASMEND 1559; GFX940-NEXT: v_mov_b32_e32 v2, 0 1560; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 1561; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1562; GFX940-NEXT: s_waitcnt vmcnt(0) 1563; GFX940-NEXT: s_setpc_b64 s[30:31] 1564 %vec0 = call <4 x half> asm "; def $0", "=v"() 1565 %vec1 = call <4 x half> asm "; def $0", "=v"() 1566 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1567 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1568 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 3> 1569 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1570 ret void 1571} 1572 1573define void @v_shuffle_v2f16_v3f16__u_4(ptr addrspace(1) inreg %ptr) { 1574; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_4: 1575; GFX900: ; %bb.0: 1576; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1577; GFX900-NEXT: v_mov_b32_e32 v2, 0 1578; GFX900-NEXT: ;;#ASMSTART 1579; GFX900-NEXT: ; def v[0:1] 1580; GFX900-NEXT: ;;#ASMEND 1581; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1582; GFX900-NEXT: s_waitcnt vmcnt(0) 1583; GFX900-NEXT: s_setpc_b64 s[30:31] 1584; 1585; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_4: 1586; GFX90A: ; %bb.0: 1587; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1588; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1589; GFX90A-NEXT: ;;#ASMSTART 1590; GFX90A-NEXT: ; def v[0:1] 1591; GFX90A-NEXT: ;;#ASMEND 1592; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1593; GFX90A-NEXT: s_waitcnt vmcnt(0) 1594; GFX90A-NEXT: s_setpc_b64 s[30:31] 1595; 1596; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_4: 1597; GFX940: ; %bb.0: 1598; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1599; GFX940-NEXT: v_mov_b32_e32 v2, 0 1600; GFX940-NEXT: ;;#ASMSTART 1601; GFX940-NEXT: ; def v[0:1] 1602; GFX940-NEXT: ;;#ASMEND 1603; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1604; GFX940-NEXT: s_waitcnt vmcnt(0) 1605; GFX940-NEXT: s_setpc_b64 s[30:31] 1606 %vec0 = call <4 x half> asm "; def $0", "=v"() 1607 %vec1 = call <4 x half> asm "; def $0", "=v"() 1608 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1609 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1610 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 poison, i32 4> 1611 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1612 ret void 1613} 1614 1615define void @v_shuffle_v2f16_v3f16__0_4(ptr addrspace(1) inreg %ptr) { 1616; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_4: 1617; GFX900: ; %bb.0: 1618; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1619; GFX900-NEXT: ;;#ASMSTART 1620; GFX900-NEXT: ; def v[0:1] 1621; GFX900-NEXT: ;;#ASMEND 1622; GFX900-NEXT: s_mov_b32 s4, 0xffff 1623; GFX900-NEXT: v_mov_b32_e32 v3, 0 1624; GFX900-NEXT: ;;#ASMSTART 1625; GFX900-NEXT: ; def v[1:2] 1626; GFX900-NEXT: ;;#ASMEND 1627; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 1628; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 1629; GFX900-NEXT: s_waitcnt vmcnt(0) 1630; GFX900-NEXT: s_setpc_b64 s[30:31] 1631; 1632; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_4: 1633; GFX90A: ; %bb.0: 1634; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1635; GFX90A-NEXT: ;;#ASMSTART 1636; GFX90A-NEXT: ; def v[0:1] 1637; GFX90A-NEXT: ;;#ASMEND 1638; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1639; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1640; GFX90A-NEXT: ;;#ASMSTART 1641; GFX90A-NEXT: ; def v[2:3] 1642; GFX90A-NEXT: ;;#ASMEND 1643; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 1644; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1645; GFX90A-NEXT: s_waitcnt vmcnt(0) 1646; GFX90A-NEXT: s_setpc_b64 s[30:31] 1647; 1648; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_4: 1649; GFX940: ; %bb.0: 1650; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1651; GFX940-NEXT: ;;#ASMSTART 1652; GFX940-NEXT: ; def v[0:1] 1653; GFX940-NEXT: ;;#ASMEND 1654; GFX940-NEXT: s_mov_b32 s2, 0xffff 1655; GFX940-NEXT: v_mov_b32_e32 v4, 0 1656; GFX940-NEXT: ;;#ASMSTART 1657; GFX940-NEXT: ; def v[2:3] 1658; GFX940-NEXT: ;;#ASMEND 1659; GFX940-NEXT: s_nop 0 1660; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 1661; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1662; GFX940-NEXT: s_waitcnt vmcnt(0) 1663; GFX940-NEXT: s_setpc_b64 s[30:31] 1664 %vec0 = call <4 x half> asm "; def $0", "=v"() 1665 %vec1 = call <4 x half> asm "; def $0", "=v"() 1666 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1667 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1668 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 0, i32 4> 1669 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1670 ret void 1671} 1672 1673define void @v_shuffle_v2f16_v3f16__1_4(ptr addrspace(1) inreg %ptr) { 1674; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_4: 1675; GFX900: ; %bb.0: 1676; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1677; GFX900-NEXT: ;;#ASMSTART 1678; GFX900-NEXT: ; def v[0:1] 1679; GFX900-NEXT: ;;#ASMEND 1680; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1681; GFX900-NEXT: v_mov_b32_e32 v3, 0 1682; GFX900-NEXT: ;;#ASMSTART 1683; GFX900-NEXT: ; def v[1:2] 1684; GFX900-NEXT: ;;#ASMEND 1685; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 1686; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 1687; GFX900-NEXT: s_waitcnt vmcnt(0) 1688; GFX900-NEXT: s_setpc_b64 s[30:31] 1689; 1690; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_4: 1691; GFX90A: ; %bb.0: 1692; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1693; GFX90A-NEXT: ;;#ASMSTART 1694; GFX90A-NEXT: ; def v[0:1] 1695; GFX90A-NEXT: ;;#ASMEND 1696; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1697; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1698; GFX90A-NEXT: ;;#ASMSTART 1699; GFX90A-NEXT: ; def v[2:3] 1700; GFX90A-NEXT: ;;#ASMEND 1701; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 1702; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1703; GFX90A-NEXT: s_waitcnt vmcnt(0) 1704; GFX90A-NEXT: s_setpc_b64 s[30:31] 1705; 1706; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_4: 1707; GFX940: ; %bb.0: 1708; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1709; GFX940-NEXT: ;;#ASMSTART 1710; GFX940-NEXT: ; def v[0:1] 1711; GFX940-NEXT: ;;#ASMEND 1712; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1713; GFX940-NEXT: v_mov_b32_e32 v4, 0 1714; GFX940-NEXT: ;;#ASMSTART 1715; GFX940-NEXT: ; def v[2:3] 1716; GFX940-NEXT: ;;#ASMEND 1717; GFX940-NEXT: s_nop 0 1718; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 1719; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1720; GFX940-NEXT: s_waitcnt vmcnt(0) 1721; GFX940-NEXT: s_setpc_b64 s[30:31] 1722 %vec0 = call <4 x half> asm "; def $0", "=v"() 1723 %vec1 = call <4 x half> asm "; def $0", "=v"() 1724 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1725 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1726 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 1, i32 4> 1727 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1728 ret void 1729} 1730 1731define void @v_shuffle_v2f16_v3f16__2_4(ptr addrspace(1) inreg %ptr) { 1732; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_4: 1733; GFX900: ; %bb.0: 1734; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1735; GFX900-NEXT: ;;#ASMSTART 1736; GFX900-NEXT: ; def v[0:1] 1737; GFX900-NEXT: ;;#ASMEND 1738; GFX900-NEXT: s_mov_b32 s4, 0xffff 1739; GFX900-NEXT: v_mov_b32_e32 v4, 0 1740; GFX900-NEXT: ;;#ASMSTART 1741; GFX900-NEXT: ; def v[2:3] 1742; GFX900-NEXT: ;;#ASMEND 1743; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 1744; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1745; GFX900-NEXT: s_waitcnt vmcnt(0) 1746; GFX900-NEXT: s_setpc_b64 s[30:31] 1747; 1748; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_4: 1749; GFX90A: ; %bb.0: 1750; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1751; GFX90A-NEXT: ;;#ASMSTART 1752; GFX90A-NEXT: ; def v[0:1] 1753; GFX90A-NEXT: ;;#ASMEND 1754; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1755; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1756; GFX90A-NEXT: ;;#ASMSTART 1757; GFX90A-NEXT: ; def v[2:3] 1758; GFX90A-NEXT: ;;#ASMEND 1759; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 1760; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1761; GFX90A-NEXT: s_waitcnt vmcnt(0) 1762; GFX90A-NEXT: s_setpc_b64 s[30:31] 1763; 1764; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_4: 1765; GFX940: ; %bb.0: 1766; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1767; GFX940-NEXT: ;;#ASMSTART 1768; GFX940-NEXT: ; def v[0:1] 1769; GFX940-NEXT: ;;#ASMEND 1770; GFX940-NEXT: s_mov_b32 s2, 0xffff 1771; GFX940-NEXT: v_mov_b32_e32 v4, 0 1772; GFX940-NEXT: ;;#ASMSTART 1773; GFX940-NEXT: ; def v[2:3] 1774; GFX940-NEXT: ;;#ASMEND 1775; GFX940-NEXT: s_nop 0 1776; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 1777; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1778; GFX940-NEXT: s_waitcnt vmcnt(0) 1779; GFX940-NEXT: s_setpc_b64 s[30:31] 1780 %vec0 = call <4 x half> asm "; def $0", "=v"() 1781 %vec1 = call <4 x half> asm "; def $0", "=v"() 1782 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1783 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1784 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 2, i32 4> 1785 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1786 ret void 1787} 1788 1789define void @v_shuffle_v2f16_v3f16__3_4(ptr addrspace(1) inreg %ptr) { 1790; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_4: 1791; GFX900: ; %bb.0: 1792; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1793; GFX900-NEXT: v_mov_b32_e32 v2, 0 1794; GFX900-NEXT: ;;#ASMSTART 1795; GFX900-NEXT: ; def v[0:1] 1796; GFX900-NEXT: ;;#ASMEND 1797; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1798; GFX900-NEXT: s_waitcnt vmcnt(0) 1799; GFX900-NEXT: s_setpc_b64 s[30:31] 1800; 1801; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_4: 1802; GFX90A: ; %bb.0: 1803; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1804; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1805; GFX90A-NEXT: ;;#ASMSTART 1806; GFX90A-NEXT: ; def v[0:1] 1807; GFX90A-NEXT: ;;#ASMEND 1808; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1809; GFX90A-NEXT: s_waitcnt vmcnt(0) 1810; GFX90A-NEXT: s_setpc_b64 s[30:31] 1811; 1812; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_4: 1813; GFX940: ; %bb.0: 1814; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1815; GFX940-NEXT: v_mov_b32_e32 v2, 0 1816; GFX940-NEXT: ;;#ASMSTART 1817; GFX940-NEXT: ; def v[0:1] 1818; GFX940-NEXT: ;;#ASMEND 1819; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1820; GFX940-NEXT: s_waitcnt vmcnt(0) 1821; GFX940-NEXT: s_setpc_b64 s[30:31] 1822 %vec0 = call <4 x half> asm "; def $0", "=v"() 1823 %vec1 = call <4 x half> asm "; def $0", "=v"() 1824 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1825 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1826 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 3, i32 4> 1827 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1828 ret void 1829} 1830 1831define void @v_shuffle_v2f16_v3f16__4_4(ptr addrspace(1) inreg %ptr) { 1832; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_4: 1833; GFX900: ; %bb.0: 1834; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1835; GFX900-NEXT: ;;#ASMSTART 1836; GFX900-NEXT: ; def v[0:1] 1837; GFX900-NEXT: ;;#ASMEND 1838; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1839; GFX900-NEXT: v_mov_b32_e32 v2, 0 1840; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 1841; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1842; GFX900-NEXT: s_waitcnt vmcnt(0) 1843; GFX900-NEXT: s_setpc_b64 s[30:31] 1844; 1845; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_4: 1846; GFX90A: ; %bb.0: 1847; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1848; GFX90A-NEXT: ;;#ASMSTART 1849; GFX90A-NEXT: ; def v[0:1] 1850; GFX90A-NEXT: ;;#ASMEND 1851; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1852; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1853; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 1854; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1855; GFX90A-NEXT: s_waitcnt vmcnt(0) 1856; GFX90A-NEXT: s_setpc_b64 s[30:31] 1857; 1858; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_4: 1859; GFX940: ; %bb.0: 1860; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1861; GFX940-NEXT: ;;#ASMSTART 1862; GFX940-NEXT: ; def v[0:1] 1863; GFX940-NEXT: ;;#ASMEND 1864; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1865; GFX940-NEXT: v_mov_b32_e32 v2, 0 1866; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 1867; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1868; GFX940-NEXT: s_waitcnt vmcnt(0) 1869; GFX940-NEXT: s_setpc_b64 s[30:31] 1870 %vec0 = call <4 x half> asm "; def $0", "=v"() 1871 %vec1 = call <4 x half> asm "; def $0", "=v"() 1872 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1873 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1874 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 4> 1875 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1876 ret void 1877} 1878 1879define void @v_shuffle_v2f16_v3f16__u_5(ptr addrspace(1) inreg %ptr) { 1880; GFX900-LABEL: v_shuffle_v2f16_v3f16__u_5: 1881; GFX900: ; %bb.0: 1882; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1883; GFX900-NEXT: ;;#ASMSTART 1884; GFX900-NEXT: ; def v[0:1] 1885; GFX900-NEXT: ;;#ASMEND 1886; GFX900-NEXT: v_mov_b32_e32 v2, 0 1887; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1888; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1889; GFX900-NEXT: s_waitcnt vmcnt(0) 1890; GFX900-NEXT: s_setpc_b64 s[30:31] 1891; 1892; GFX90A-LABEL: v_shuffle_v2f16_v3f16__u_5: 1893; GFX90A: ; %bb.0: 1894; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1895; GFX90A-NEXT: ;;#ASMSTART 1896; GFX90A-NEXT: ; def v[0:1] 1897; GFX90A-NEXT: ;;#ASMEND 1898; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1899; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1900; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1901; GFX90A-NEXT: s_waitcnt vmcnt(0) 1902; GFX90A-NEXT: s_setpc_b64 s[30:31] 1903; 1904; GFX940-LABEL: v_shuffle_v2f16_v3f16__u_5: 1905; GFX940: ; %bb.0: 1906; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1907; GFX940-NEXT: ;;#ASMSTART 1908; GFX940-NEXT: ; def v[0:1] 1909; GFX940-NEXT: ;;#ASMEND 1910; GFX940-NEXT: v_mov_b32_e32 v2, 0 1911; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1912; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1913; GFX940-NEXT: s_waitcnt vmcnt(0) 1914; GFX940-NEXT: s_setpc_b64 s[30:31] 1915 %vec0 = call <4 x half> asm "; def $0", "=v"() 1916 %vec1 = call <4 x half> asm "; def $0", "=v"() 1917 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1918 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1919 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 poison, i32 5> 1920 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1921 ret void 1922} 1923 1924define void @v_shuffle_v2f16_v3f16__0_5(ptr addrspace(1) inreg %ptr) { 1925; GFX900-LABEL: v_shuffle_v2f16_v3f16__0_5: 1926; GFX900: ; %bb.0: 1927; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1928; GFX900-NEXT: ;;#ASMSTART 1929; GFX900-NEXT: ; def v[0:1] 1930; GFX900-NEXT: ;;#ASMEND 1931; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1932; GFX900-NEXT: v_mov_b32_e32 v3, 0 1933; GFX900-NEXT: ;;#ASMSTART 1934; GFX900-NEXT: ; def v[1:2] 1935; GFX900-NEXT: ;;#ASMEND 1936; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 1937; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 1938; GFX900-NEXT: s_waitcnt vmcnt(0) 1939; GFX900-NEXT: s_setpc_b64 s[30:31] 1940; 1941; GFX90A-LABEL: v_shuffle_v2f16_v3f16__0_5: 1942; GFX90A: ; %bb.0: 1943; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1944; GFX90A-NEXT: ;;#ASMSTART 1945; GFX90A-NEXT: ; def v[0:1] 1946; GFX90A-NEXT: ;;#ASMEND 1947; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1948; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1949; GFX90A-NEXT: ;;#ASMSTART 1950; GFX90A-NEXT: ; def v[2:3] 1951; GFX90A-NEXT: ;;#ASMEND 1952; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 1953; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1954; GFX90A-NEXT: s_waitcnt vmcnt(0) 1955; GFX90A-NEXT: s_setpc_b64 s[30:31] 1956; 1957; GFX940-LABEL: v_shuffle_v2f16_v3f16__0_5: 1958; GFX940: ; %bb.0: 1959; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1960; GFX940-NEXT: ;;#ASMSTART 1961; GFX940-NEXT: ; def v[0:1] 1962; GFX940-NEXT: ;;#ASMEND 1963; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1964; GFX940-NEXT: v_mov_b32_e32 v4, 0 1965; GFX940-NEXT: ;;#ASMSTART 1966; GFX940-NEXT: ; def v[2:3] 1967; GFX940-NEXT: ;;#ASMEND 1968; GFX940-NEXT: s_nop 0 1969; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 1970; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1971; GFX940-NEXT: s_waitcnt vmcnt(0) 1972; GFX940-NEXT: s_setpc_b64 s[30:31] 1973 %vec0 = call <4 x half> asm "; def $0", "=v"() 1974 %vec1 = call <4 x half> asm "; def $0", "=v"() 1975 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1976 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 1977 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 0, i32 5> 1978 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 1979 ret void 1980} 1981 1982define void @v_shuffle_v2f16_v3f16__1_5(ptr addrspace(1) inreg %ptr) { 1983; GFX900-LABEL: v_shuffle_v2f16_v3f16__1_5: 1984; GFX900: ; %bb.0: 1985; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1986; GFX900-NEXT: ;;#ASMSTART 1987; GFX900-NEXT: ; def v[0:1] 1988; GFX900-NEXT: ;;#ASMEND 1989; GFX900-NEXT: v_mov_b32_e32 v3, 0 1990; GFX900-NEXT: ;;#ASMSTART 1991; GFX900-NEXT: ; def v[1:2] 1992; GFX900-NEXT: ;;#ASMEND 1993; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 1994; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 1995; GFX900-NEXT: s_waitcnt vmcnt(0) 1996; GFX900-NEXT: s_setpc_b64 s[30:31] 1997; 1998; GFX90A-LABEL: v_shuffle_v2f16_v3f16__1_5: 1999; GFX90A: ; %bb.0: 2000; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2001; GFX90A-NEXT: ;;#ASMSTART 2002; GFX90A-NEXT: ; def v[0:1] 2003; GFX90A-NEXT: ;;#ASMEND 2004; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2005; GFX90A-NEXT: ;;#ASMSTART 2006; GFX90A-NEXT: ; def v[2:3] 2007; GFX90A-NEXT: ;;#ASMEND 2008; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 2009; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2010; GFX90A-NEXT: s_waitcnt vmcnt(0) 2011; GFX90A-NEXT: s_setpc_b64 s[30:31] 2012; 2013; GFX940-LABEL: v_shuffle_v2f16_v3f16__1_5: 2014; GFX940: ; %bb.0: 2015; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2016; GFX940-NEXT: ;;#ASMSTART 2017; GFX940-NEXT: ; def v[0:1] 2018; GFX940-NEXT: ;;#ASMEND 2019; GFX940-NEXT: v_mov_b32_e32 v4, 0 2020; GFX940-NEXT: ;;#ASMSTART 2021; GFX940-NEXT: ; def v[2:3] 2022; GFX940-NEXT: ;;#ASMEND 2023; GFX940-NEXT: s_nop 0 2024; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 2025; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2026; GFX940-NEXT: s_waitcnt vmcnt(0) 2027; GFX940-NEXT: s_setpc_b64 s[30:31] 2028 %vec0 = call <4 x half> asm "; def $0", "=v"() 2029 %vec1 = call <4 x half> asm "; def $0", "=v"() 2030 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2031 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2032 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 1, i32 5> 2033 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 2034 ret void 2035} 2036 2037define void @v_shuffle_v2f16_v3f16__2_5(ptr addrspace(1) inreg %ptr) { 2038; GFX900-LABEL: v_shuffle_v2f16_v3f16__2_5: 2039; GFX900: ; %bb.0: 2040; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2041; GFX900-NEXT: ;;#ASMSTART 2042; GFX900-NEXT: ; def v[0:1] 2043; GFX900-NEXT: ;;#ASMEND 2044; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2045; GFX900-NEXT: v_mov_b32_e32 v4, 0 2046; GFX900-NEXT: ;;#ASMSTART 2047; GFX900-NEXT: ; def v[2:3] 2048; GFX900-NEXT: ;;#ASMEND 2049; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 2050; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2051; GFX900-NEXT: s_waitcnt vmcnt(0) 2052; GFX900-NEXT: s_setpc_b64 s[30:31] 2053; 2054; GFX90A-LABEL: v_shuffle_v2f16_v3f16__2_5: 2055; GFX90A: ; %bb.0: 2056; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2057; GFX90A-NEXT: ;;#ASMSTART 2058; GFX90A-NEXT: ; def v[0:1] 2059; GFX90A-NEXT: ;;#ASMEND 2060; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2061; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2062; GFX90A-NEXT: ;;#ASMSTART 2063; GFX90A-NEXT: ; def v[2:3] 2064; GFX90A-NEXT: ;;#ASMEND 2065; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 2066; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2067; GFX90A-NEXT: s_waitcnt vmcnt(0) 2068; GFX90A-NEXT: s_setpc_b64 s[30:31] 2069; 2070; GFX940-LABEL: v_shuffle_v2f16_v3f16__2_5: 2071; GFX940: ; %bb.0: 2072; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2073; GFX940-NEXT: ;;#ASMSTART 2074; GFX940-NEXT: ; def v[0:1] 2075; GFX940-NEXT: ;;#ASMEND 2076; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2077; GFX940-NEXT: v_mov_b32_e32 v4, 0 2078; GFX940-NEXT: ;;#ASMSTART 2079; GFX940-NEXT: ; def v[2:3] 2080; GFX940-NEXT: ;;#ASMEND 2081; GFX940-NEXT: s_nop 0 2082; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 2083; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2084; GFX940-NEXT: s_waitcnt vmcnt(0) 2085; GFX940-NEXT: s_setpc_b64 s[30:31] 2086 %vec0 = call <4 x half> asm "; def $0", "=v"() 2087 %vec1 = call <4 x half> asm "; def $0", "=v"() 2088 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2089 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2090 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 2, i32 5> 2091 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 2092 ret void 2093} 2094 2095define void @v_shuffle_v2f16_v3f16__3_5(ptr addrspace(1) inreg %ptr) { 2096; GFX900-LABEL: v_shuffle_v2f16_v3f16__3_5: 2097; GFX900: ; %bb.0: 2098; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2099; GFX900-NEXT: ;;#ASMSTART 2100; GFX900-NEXT: ; def v[0:1] 2101; GFX900-NEXT: ;;#ASMEND 2102; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2103; GFX900-NEXT: v_mov_b32_e32 v2, 0 2104; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 2105; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2106; GFX900-NEXT: s_waitcnt vmcnt(0) 2107; GFX900-NEXT: s_setpc_b64 s[30:31] 2108; 2109; GFX90A-LABEL: v_shuffle_v2f16_v3f16__3_5: 2110; GFX90A: ; %bb.0: 2111; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2112; GFX90A-NEXT: ;;#ASMSTART 2113; GFX90A-NEXT: ; def v[0:1] 2114; GFX90A-NEXT: ;;#ASMEND 2115; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2116; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2117; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 2118; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2119; GFX90A-NEXT: s_waitcnt vmcnt(0) 2120; GFX90A-NEXT: s_setpc_b64 s[30:31] 2121; 2122; GFX940-LABEL: v_shuffle_v2f16_v3f16__3_5: 2123; GFX940: ; %bb.0: 2124; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2125; GFX940-NEXT: ;;#ASMSTART 2126; GFX940-NEXT: ; def v[0:1] 2127; GFX940-NEXT: ;;#ASMEND 2128; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2129; GFX940-NEXT: v_mov_b32_e32 v2, 0 2130; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 2131; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2132; GFX940-NEXT: s_waitcnt vmcnt(0) 2133; GFX940-NEXT: s_setpc_b64 s[30:31] 2134 %vec0 = call <4 x half> asm "; def $0", "=v"() 2135 %vec1 = call <4 x half> asm "; def $0", "=v"() 2136 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2137 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2138 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 3, i32 5> 2139 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 2140 ret void 2141} 2142 2143define void @v_shuffle_v2f16_v3f16__4_5(ptr addrspace(1) inreg %ptr) { 2144; GFX900-LABEL: v_shuffle_v2f16_v3f16__4_5: 2145; GFX900: ; %bb.0: 2146; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2147; GFX900-NEXT: ;;#ASMSTART 2148; GFX900-NEXT: ; def v[0:1] 2149; GFX900-NEXT: ;;#ASMEND 2150; GFX900-NEXT: v_mov_b32_e32 v2, 0 2151; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 2152; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2153; GFX900-NEXT: s_waitcnt vmcnt(0) 2154; GFX900-NEXT: s_setpc_b64 s[30:31] 2155; 2156; GFX90A-LABEL: v_shuffle_v2f16_v3f16__4_5: 2157; GFX90A: ; %bb.0: 2158; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2159; GFX90A-NEXT: ;;#ASMSTART 2160; GFX90A-NEXT: ; def v[0:1] 2161; GFX90A-NEXT: ;;#ASMEND 2162; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2163; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 2164; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2165; GFX90A-NEXT: s_waitcnt vmcnt(0) 2166; GFX90A-NEXT: s_setpc_b64 s[30:31] 2167; 2168; GFX940-LABEL: v_shuffle_v2f16_v3f16__4_5: 2169; GFX940: ; %bb.0: 2170; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2171; GFX940-NEXT: ;;#ASMSTART 2172; GFX940-NEXT: ; def v[0:1] 2173; GFX940-NEXT: ;;#ASMEND 2174; GFX940-NEXT: v_mov_b32_e32 v2, 0 2175; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 2176; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2177; GFX940-NEXT: s_waitcnt vmcnt(0) 2178; GFX940-NEXT: s_setpc_b64 s[30:31] 2179 %vec0 = call <4 x half> asm "; def $0", "=v"() 2180 %vec1 = call <4 x half> asm "; def $0", "=v"() 2181 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2182 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2183 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 5> 2184 store <2 x half> %shuf, ptr addrspace(1) %ptr, align 4 2185 ret void 2186} 2187 2188define void @s_shuffle_v2f16_v3f16__u_u() { 2189; GFX9-LABEL: s_shuffle_v2f16_v3f16__u_u: 2190; GFX9: ; %bb.0: 2191; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2192; GFX9-NEXT: ;;#ASMSTART 2193; GFX9-NEXT: ; use s8 2194; GFX9-NEXT: ;;#ASMEND 2195; GFX9-NEXT: s_setpc_b64 s[30:31] 2196 %vec0 = call <4 x half> asm "; def $0", "=s"() 2197 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2198 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> poison 2199 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2200 ret void 2201} 2202 2203define void @s_shuffle_v2f16_v3f16__0_u() { 2204; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_u: 2205; GFX900: ; %bb.0: 2206; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2207; GFX900-NEXT: ;;#ASMSTART 2208; GFX900-NEXT: ; def s[8:9] 2209; GFX900-NEXT: ;;#ASMEND 2210; GFX900-NEXT: ;;#ASMSTART 2211; GFX900-NEXT: ; use s8 2212; GFX900-NEXT: ;;#ASMEND 2213; GFX900-NEXT: s_setpc_b64 s[30:31] 2214; 2215; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_u: 2216; GFX90A: ; %bb.0: 2217; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2218; GFX90A-NEXT: ;;#ASMSTART 2219; GFX90A-NEXT: ; def s[8:9] 2220; GFX90A-NEXT: ;;#ASMEND 2221; GFX90A-NEXT: ;;#ASMSTART 2222; GFX90A-NEXT: ; use s8 2223; GFX90A-NEXT: ;;#ASMEND 2224; GFX90A-NEXT: s_setpc_b64 s[30:31] 2225; 2226; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_u: 2227; GFX940: ; %bb.0: 2228; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2229; GFX940-NEXT: ;;#ASMSTART 2230; GFX940-NEXT: ; def s[8:9] 2231; GFX940-NEXT: ;;#ASMEND 2232; GFX940-NEXT: s_nop 0 2233; GFX940-NEXT: ;;#ASMSTART 2234; GFX940-NEXT: ; use s8 2235; GFX940-NEXT: ;;#ASMEND 2236; GFX940-NEXT: s_setpc_b64 s[30:31] 2237 %vec0 = call <4 x half> asm "; def $0", "=s"() 2238 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2239 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 poison> 2240 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2241 ret void 2242} 2243 2244define void @s_shuffle_v2f16_v3f16__1_u() { 2245; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_u: 2246; GFX900: ; %bb.0: 2247; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2248; GFX900-NEXT: ;;#ASMSTART 2249; GFX900-NEXT: ; def s[4:5] 2250; GFX900-NEXT: ;;#ASMEND 2251; GFX900-NEXT: s_lshr_b32 s8, s4, 16 2252; GFX900-NEXT: ;;#ASMSTART 2253; GFX900-NEXT: ; use s8 2254; GFX900-NEXT: ;;#ASMEND 2255; GFX900-NEXT: s_setpc_b64 s[30:31] 2256; 2257; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_u: 2258; GFX90A: ; %bb.0: 2259; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2260; GFX90A-NEXT: ;;#ASMSTART 2261; GFX90A-NEXT: ; def s[4:5] 2262; GFX90A-NEXT: ;;#ASMEND 2263; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 2264; GFX90A-NEXT: ;;#ASMSTART 2265; GFX90A-NEXT: ; use s8 2266; GFX90A-NEXT: ;;#ASMEND 2267; GFX90A-NEXT: s_setpc_b64 s[30:31] 2268; 2269; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_u: 2270; GFX940: ; %bb.0: 2271; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2272; GFX940-NEXT: ;;#ASMSTART 2273; GFX940-NEXT: ; def s[0:1] 2274; GFX940-NEXT: ;;#ASMEND 2275; GFX940-NEXT: s_lshr_b32 s8, s0, 16 2276; GFX940-NEXT: ;;#ASMSTART 2277; GFX940-NEXT: ; use s8 2278; GFX940-NEXT: ;;#ASMEND 2279; GFX940-NEXT: s_setpc_b64 s[30:31] 2280 %vec0 = call <4 x half> asm "; def $0", "=s"() 2281 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2282 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 poison> 2283 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2284 ret void 2285} 2286 2287define void @s_shuffle_v2f16_v3f16__2_u() { 2288; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_u: 2289; GFX900: ; %bb.0: 2290; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2291; GFX900-NEXT: ;;#ASMSTART 2292; GFX900-NEXT: ; def s[4:5] 2293; GFX900-NEXT: ;;#ASMEND 2294; GFX900-NEXT: s_mov_b32 s8, s5 2295; GFX900-NEXT: ;;#ASMSTART 2296; GFX900-NEXT: ; use s8 2297; GFX900-NEXT: ;;#ASMEND 2298; GFX900-NEXT: s_setpc_b64 s[30:31] 2299; 2300; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_u: 2301; GFX90A: ; %bb.0: 2302; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2303; GFX90A-NEXT: ;;#ASMSTART 2304; GFX90A-NEXT: ; def s[4:5] 2305; GFX90A-NEXT: ;;#ASMEND 2306; GFX90A-NEXT: s_mov_b32 s8, s5 2307; GFX90A-NEXT: ;;#ASMSTART 2308; GFX90A-NEXT: ; use s8 2309; GFX90A-NEXT: ;;#ASMEND 2310; GFX90A-NEXT: s_setpc_b64 s[30:31] 2311; 2312; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_u: 2313; GFX940: ; %bb.0: 2314; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2315; GFX940-NEXT: ;;#ASMSTART 2316; GFX940-NEXT: ; def s[0:1] 2317; GFX940-NEXT: ;;#ASMEND 2318; GFX940-NEXT: s_mov_b32 s8, s1 2319; GFX940-NEXT: ;;#ASMSTART 2320; GFX940-NEXT: ; use s8 2321; GFX940-NEXT: ;;#ASMEND 2322; GFX940-NEXT: s_setpc_b64 s[30:31] 2323 %vec0 = call <4 x half> asm "; def $0", "=s"() 2324 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2325 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 poison> 2326 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2327 ret void 2328} 2329 2330define void @s_shuffle_v2f16_v3f16__3_u() { 2331; GFX9-LABEL: s_shuffle_v2f16_v3f16__3_u: 2332; GFX9: ; %bb.0: 2333; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2334; GFX9-NEXT: ;;#ASMSTART 2335; GFX9-NEXT: ; use s8 2336; GFX9-NEXT: ;;#ASMEND 2337; GFX9-NEXT: s_setpc_b64 s[30:31] 2338 %vec0 = call <4 x half> asm "; def $0", "=s"() 2339 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2340 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 poison> 2341 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2342 ret void 2343} 2344 2345define void @s_shuffle_v2f16_v3f16__4_u() { 2346; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_u: 2347; GFX900: ; %bb.0: 2348; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2349; GFX900-NEXT: ;;#ASMSTART 2350; GFX900-NEXT: ; def s[4:5] 2351; GFX900-NEXT: ;;#ASMEND 2352; GFX900-NEXT: s_lshr_b32 s8, s4, 16 2353; GFX900-NEXT: ;;#ASMSTART 2354; GFX900-NEXT: ; use s8 2355; GFX900-NEXT: ;;#ASMEND 2356; GFX900-NEXT: s_setpc_b64 s[30:31] 2357; 2358; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_u: 2359; GFX90A: ; %bb.0: 2360; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2361; GFX90A-NEXT: ;;#ASMSTART 2362; GFX90A-NEXT: ; def s[4:5] 2363; GFX90A-NEXT: ;;#ASMEND 2364; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 2365; GFX90A-NEXT: ;;#ASMSTART 2366; GFX90A-NEXT: ; use s8 2367; GFX90A-NEXT: ;;#ASMEND 2368; GFX90A-NEXT: s_setpc_b64 s[30:31] 2369; 2370; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_u: 2371; GFX940: ; %bb.0: 2372; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2373; GFX940-NEXT: ;;#ASMSTART 2374; GFX940-NEXT: ; def s[0:1] 2375; GFX940-NEXT: ;;#ASMEND 2376; GFX940-NEXT: s_lshr_b32 s8, s0, 16 2377; GFX940-NEXT: ;;#ASMSTART 2378; GFX940-NEXT: ; use s8 2379; GFX940-NEXT: ;;#ASMEND 2380; GFX940-NEXT: s_setpc_b64 s[30:31] 2381 %vec0 = call <4 x half> asm "; def $0", "=s"() 2382 %vec1 = call <4 x half> asm "; def $0", "=s"() 2383 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2384 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2385 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 poison> 2386 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2387 ret void 2388} 2389 2390define void @s_shuffle_v2f16_v3f16__5_u() { 2391; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_u: 2392; GFX900: ; %bb.0: 2393; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2394; GFX900-NEXT: ;;#ASMSTART 2395; GFX900-NEXT: ; def s[4:5] 2396; GFX900-NEXT: ;;#ASMEND 2397; GFX900-NEXT: s_mov_b32 s8, s5 2398; GFX900-NEXT: ;;#ASMSTART 2399; GFX900-NEXT: ; use s8 2400; GFX900-NEXT: ;;#ASMEND 2401; GFX900-NEXT: s_setpc_b64 s[30:31] 2402; 2403; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_u: 2404; GFX90A: ; %bb.0: 2405; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2406; GFX90A-NEXT: ;;#ASMSTART 2407; GFX90A-NEXT: ; def s[4:5] 2408; GFX90A-NEXT: ;;#ASMEND 2409; GFX90A-NEXT: s_mov_b32 s8, s5 2410; GFX90A-NEXT: ;;#ASMSTART 2411; GFX90A-NEXT: ; use s8 2412; GFX90A-NEXT: ;;#ASMEND 2413; GFX90A-NEXT: s_setpc_b64 s[30:31] 2414; 2415; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_u: 2416; GFX940: ; %bb.0: 2417; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2418; GFX940-NEXT: ;;#ASMSTART 2419; GFX940-NEXT: ; def s[0:1] 2420; GFX940-NEXT: ;;#ASMEND 2421; GFX940-NEXT: s_mov_b32 s8, s1 2422; GFX940-NEXT: ;;#ASMSTART 2423; GFX940-NEXT: ; use s8 2424; GFX940-NEXT: ;;#ASMEND 2425; GFX940-NEXT: s_setpc_b64 s[30:31] 2426 %vec0 = call <4 x half> asm "; def $0", "=s"() 2427 %vec1 = call <4 x half> asm "; def $0", "=s"() 2428 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2429 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2430 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 poison> 2431 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2432 ret void 2433} 2434 2435define void @s_shuffle_v2f16_v3f16__5_0() { 2436; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_0: 2437; GFX900: ; %bb.0: 2438; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2439; GFX900-NEXT: ;;#ASMSTART 2440; GFX900-NEXT: ; def s[4:5] 2441; GFX900-NEXT: ;;#ASMEND 2442; GFX900-NEXT: ;;#ASMSTART 2443; GFX900-NEXT: ; def s[6:7] 2444; GFX900-NEXT: ;;#ASMEND 2445; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 2446; GFX900-NEXT: ;;#ASMSTART 2447; GFX900-NEXT: ; use s8 2448; GFX900-NEXT: ;;#ASMEND 2449; GFX900-NEXT: s_setpc_b64 s[30:31] 2450; 2451; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_0: 2452; GFX90A: ; %bb.0: 2453; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2454; GFX90A-NEXT: ;;#ASMSTART 2455; GFX90A-NEXT: ; def s[4:5] 2456; GFX90A-NEXT: ;;#ASMEND 2457; GFX90A-NEXT: ;;#ASMSTART 2458; GFX90A-NEXT: ; def s[6:7] 2459; GFX90A-NEXT: ;;#ASMEND 2460; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 2461; GFX90A-NEXT: ;;#ASMSTART 2462; GFX90A-NEXT: ; use s8 2463; GFX90A-NEXT: ;;#ASMEND 2464; GFX90A-NEXT: s_setpc_b64 s[30:31] 2465; 2466; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_0: 2467; GFX940: ; %bb.0: 2468; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2469; GFX940-NEXT: ;;#ASMSTART 2470; GFX940-NEXT: ; def s[0:1] 2471; GFX940-NEXT: ;;#ASMEND 2472; GFX940-NEXT: ;;#ASMSTART 2473; GFX940-NEXT: ; def s[2:3] 2474; GFX940-NEXT: ;;#ASMEND 2475; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 2476; GFX940-NEXT: ;;#ASMSTART 2477; GFX940-NEXT: ; use s8 2478; GFX940-NEXT: ;;#ASMEND 2479; GFX940-NEXT: s_setpc_b64 s[30:31] 2480 %vec0 = call <4 x half> asm "; def $0", "=s"() 2481 %vec1 = call <4 x half> asm "; def $0", "=s"() 2482 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2483 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2484 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 0> 2485 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2486 ret void 2487} 2488 2489define void @s_shuffle_v2f16_v3f16__5_1() { 2490; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_1: 2491; GFX900: ; %bb.0: 2492; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2493; GFX900-NEXT: ;;#ASMSTART 2494; GFX900-NEXT: ; def s[4:5] 2495; GFX900-NEXT: ;;#ASMEND 2496; GFX900-NEXT: s_lshr_b32 s4, s4, 16 2497; GFX900-NEXT: ;;#ASMSTART 2498; GFX900-NEXT: ; def s[6:7] 2499; GFX900-NEXT: ;;#ASMEND 2500; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 2501; GFX900-NEXT: ;;#ASMSTART 2502; GFX900-NEXT: ; use s8 2503; GFX900-NEXT: ;;#ASMEND 2504; GFX900-NEXT: s_setpc_b64 s[30:31] 2505; 2506; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_1: 2507; GFX90A: ; %bb.0: 2508; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2509; GFX90A-NEXT: ;;#ASMSTART 2510; GFX90A-NEXT: ; def s[4:5] 2511; GFX90A-NEXT: ;;#ASMEND 2512; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 2513; GFX90A-NEXT: ;;#ASMSTART 2514; GFX90A-NEXT: ; def s[6:7] 2515; GFX90A-NEXT: ;;#ASMEND 2516; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 2517; GFX90A-NEXT: ;;#ASMSTART 2518; GFX90A-NEXT: ; use s8 2519; GFX90A-NEXT: ;;#ASMEND 2520; GFX90A-NEXT: s_setpc_b64 s[30:31] 2521; 2522; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_1: 2523; GFX940: ; %bb.0: 2524; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2525; GFX940-NEXT: ;;#ASMSTART 2526; GFX940-NEXT: ; def s[0:1] 2527; GFX940-NEXT: ;;#ASMEND 2528; GFX940-NEXT: s_lshr_b32 s0, s0, 16 2529; GFX940-NEXT: ;;#ASMSTART 2530; GFX940-NEXT: ; def s[2:3] 2531; GFX940-NEXT: ;;#ASMEND 2532; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 2533; GFX940-NEXT: ;;#ASMSTART 2534; GFX940-NEXT: ; use s8 2535; GFX940-NEXT: ;;#ASMEND 2536; GFX940-NEXT: s_setpc_b64 s[30:31] 2537 %vec0 = call <4 x half> asm "; def $0", "=s"() 2538 %vec1 = call <4 x half> asm "; def $0", "=s"() 2539 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2540 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2541 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 1> 2542 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2543 ret void 2544} 2545 2546define void @s_shuffle_v2f16_v3f16__5_2() { 2547; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_2: 2548; GFX900: ; %bb.0: 2549; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2550; GFX900-NEXT: ;;#ASMSTART 2551; GFX900-NEXT: ; def s[4:5] 2552; GFX900-NEXT: ;;#ASMEND 2553; GFX900-NEXT: ;;#ASMSTART 2554; GFX900-NEXT: ; def s[6:7] 2555; GFX900-NEXT: ;;#ASMEND 2556; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 2557; GFX900-NEXT: ;;#ASMSTART 2558; GFX900-NEXT: ; use s8 2559; GFX900-NEXT: ;;#ASMEND 2560; GFX900-NEXT: s_setpc_b64 s[30:31] 2561; 2562; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_2: 2563; GFX90A: ; %bb.0: 2564; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2565; GFX90A-NEXT: ;;#ASMSTART 2566; GFX90A-NEXT: ; def s[4:5] 2567; GFX90A-NEXT: ;;#ASMEND 2568; GFX90A-NEXT: ;;#ASMSTART 2569; GFX90A-NEXT: ; def s[6:7] 2570; GFX90A-NEXT: ;;#ASMEND 2571; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 2572; GFX90A-NEXT: ;;#ASMSTART 2573; GFX90A-NEXT: ; use s8 2574; GFX90A-NEXT: ;;#ASMEND 2575; GFX90A-NEXT: s_setpc_b64 s[30:31] 2576; 2577; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_2: 2578; GFX940: ; %bb.0: 2579; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2580; GFX940-NEXT: ;;#ASMSTART 2581; GFX940-NEXT: ; def s[0:1] 2582; GFX940-NEXT: ;;#ASMEND 2583; GFX940-NEXT: ;;#ASMSTART 2584; GFX940-NEXT: ; def s[2:3] 2585; GFX940-NEXT: ;;#ASMEND 2586; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 2587; GFX940-NEXT: ;;#ASMSTART 2588; GFX940-NEXT: ; use s8 2589; GFX940-NEXT: ;;#ASMEND 2590; GFX940-NEXT: s_setpc_b64 s[30:31] 2591 %vec0 = call <4 x half> asm "; def $0", "=s"() 2592 %vec1 = call <4 x half> asm "; def $0", "=s"() 2593 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2594 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2595 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 2> 2596 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2597 ret void 2598} 2599 2600define void @s_shuffle_v2f16_v3f16__5_3() { 2601; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_3: 2602; GFX900: ; %bb.0: 2603; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2604; GFX900-NEXT: ;;#ASMSTART 2605; GFX900-NEXT: ; def s[4:5] 2606; GFX900-NEXT: ;;#ASMEND 2607; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2608; GFX900-NEXT: ;;#ASMSTART 2609; GFX900-NEXT: ; use s8 2610; GFX900-NEXT: ;;#ASMEND 2611; GFX900-NEXT: s_setpc_b64 s[30:31] 2612; 2613; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_3: 2614; GFX90A: ; %bb.0: 2615; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2616; GFX90A-NEXT: ;;#ASMSTART 2617; GFX90A-NEXT: ; def s[4:5] 2618; GFX90A-NEXT: ;;#ASMEND 2619; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2620; GFX90A-NEXT: ;;#ASMSTART 2621; GFX90A-NEXT: ; use s8 2622; GFX90A-NEXT: ;;#ASMEND 2623; GFX90A-NEXT: s_setpc_b64 s[30:31] 2624; 2625; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_3: 2626; GFX940: ; %bb.0: 2627; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2628; GFX940-NEXT: ;;#ASMSTART 2629; GFX940-NEXT: ; def s[0:1] 2630; GFX940-NEXT: ;;#ASMEND 2631; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 2632; GFX940-NEXT: ;;#ASMSTART 2633; GFX940-NEXT: ; use s8 2634; GFX940-NEXT: ;;#ASMEND 2635; GFX940-NEXT: s_setpc_b64 s[30:31] 2636 %vec0 = call <4 x half> asm "; def $0", "=s"() 2637 %vec1 = call <4 x half> asm "; def $0", "=s"() 2638 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2639 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2640 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 3> 2641 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2642 ret void 2643} 2644 2645define void @s_shuffle_v2f16_v3f16__5_4() { 2646; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_4: 2647; GFX900: ; %bb.0: 2648; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2649; GFX900-NEXT: ;;#ASMSTART 2650; GFX900-NEXT: ; def s[4:5] 2651; GFX900-NEXT: ;;#ASMEND 2652; GFX900-NEXT: s_lshr_b32 s4, s4, 16 2653; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2654; GFX900-NEXT: ;;#ASMSTART 2655; GFX900-NEXT: ; use s8 2656; GFX900-NEXT: ;;#ASMEND 2657; GFX900-NEXT: s_setpc_b64 s[30:31] 2658; 2659; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_4: 2660; GFX90A: ; %bb.0: 2661; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2662; GFX90A-NEXT: ;;#ASMSTART 2663; GFX90A-NEXT: ; def s[4:5] 2664; GFX90A-NEXT: ;;#ASMEND 2665; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 2666; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2667; GFX90A-NEXT: ;;#ASMSTART 2668; GFX90A-NEXT: ; use s8 2669; GFX90A-NEXT: ;;#ASMEND 2670; GFX90A-NEXT: s_setpc_b64 s[30:31] 2671; 2672; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_4: 2673; GFX940: ; %bb.0: 2674; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2675; GFX940-NEXT: ;;#ASMSTART 2676; GFX940-NEXT: ; def s[0:1] 2677; GFX940-NEXT: ;;#ASMEND 2678; GFX940-NEXT: s_lshr_b32 s0, s0, 16 2679; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 2680; GFX940-NEXT: ;;#ASMSTART 2681; GFX940-NEXT: ; use s8 2682; GFX940-NEXT: ;;#ASMEND 2683; GFX940-NEXT: s_setpc_b64 s[30:31] 2684 %vec0 = call <4 x half> asm "; def $0", "=s"() 2685 %vec1 = call <4 x half> asm "; def $0", "=s"() 2686 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2687 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2688 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 4> 2689 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2690 ret void 2691} 2692 2693define void @s_shuffle_v2f16_v3f16__5_5() { 2694; GFX900-LABEL: s_shuffle_v2f16_v3f16__5_5: 2695; GFX900: ; %bb.0: 2696; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2697; GFX900-NEXT: ;;#ASMSTART 2698; GFX900-NEXT: ; def s[4:5] 2699; GFX900-NEXT: ;;#ASMEND 2700; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 2701; GFX900-NEXT: ;;#ASMSTART 2702; GFX900-NEXT: ; use s8 2703; GFX900-NEXT: ;;#ASMEND 2704; GFX900-NEXT: s_setpc_b64 s[30:31] 2705; 2706; GFX90A-LABEL: s_shuffle_v2f16_v3f16__5_5: 2707; GFX90A: ; %bb.0: 2708; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2709; GFX90A-NEXT: ;;#ASMSTART 2710; GFX90A-NEXT: ; def s[4:5] 2711; GFX90A-NEXT: ;;#ASMEND 2712; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 2713; GFX90A-NEXT: ;;#ASMSTART 2714; GFX90A-NEXT: ; use s8 2715; GFX90A-NEXT: ;;#ASMEND 2716; GFX90A-NEXT: s_setpc_b64 s[30:31] 2717; 2718; GFX940-LABEL: s_shuffle_v2f16_v3f16__5_5: 2719; GFX940: ; %bb.0: 2720; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2721; GFX940-NEXT: ;;#ASMSTART 2722; GFX940-NEXT: ; def s[0:1] 2723; GFX940-NEXT: ;;#ASMEND 2724; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 2725; GFX940-NEXT: ;;#ASMSTART 2726; GFX940-NEXT: ; use s8 2727; GFX940-NEXT: ;;#ASMEND 2728; GFX940-NEXT: s_setpc_b64 s[30:31] 2729 %vec0 = call <4 x half> asm "; def $0", "=s"() 2730 %vec1 = call <4 x half> asm "; def $0", "=s"() 2731 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2732 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2733 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 5, i32 5> 2734 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2735 ret void 2736} 2737 2738define void @s_shuffle_v2f16_v3f16__u_0() { 2739; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_0: 2740; GFX900: ; %bb.0: 2741; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2742; GFX900-NEXT: ;;#ASMSTART 2743; GFX900-NEXT: ; def s[4:5] 2744; GFX900-NEXT: ;;#ASMEND 2745; GFX900-NEXT: s_lshl_b32 s8, s4, 16 2746; GFX900-NEXT: ;;#ASMSTART 2747; GFX900-NEXT: ; use s8 2748; GFX900-NEXT: ;;#ASMEND 2749; GFX900-NEXT: s_setpc_b64 s[30:31] 2750; 2751; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_0: 2752; GFX90A: ; %bb.0: 2753; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2754; GFX90A-NEXT: ;;#ASMSTART 2755; GFX90A-NEXT: ; def s[4:5] 2756; GFX90A-NEXT: ;;#ASMEND 2757; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 2758; GFX90A-NEXT: ;;#ASMSTART 2759; GFX90A-NEXT: ; use s8 2760; GFX90A-NEXT: ;;#ASMEND 2761; GFX90A-NEXT: s_setpc_b64 s[30:31] 2762; 2763; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_0: 2764; GFX940: ; %bb.0: 2765; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2766; GFX940-NEXT: ;;#ASMSTART 2767; GFX940-NEXT: ; def s[0:1] 2768; GFX940-NEXT: ;;#ASMEND 2769; GFX940-NEXT: s_lshl_b32 s8, s0, 16 2770; GFX940-NEXT: ;;#ASMSTART 2771; GFX940-NEXT: ; use s8 2772; GFX940-NEXT: ;;#ASMEND 2773; GFX940-NEXT: s_setpc_b64 s[30:31] 2774 %vec0 = call <4 x half> asm "; def $0", "=s"() 2775 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2776 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 0> 2777 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2778 ret void 2779} 2780 2781define void @s_shuffle_v2f16_v3f16__0_0() { 2782; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_0: 2783; GFX900: ; %bb.0: 2784; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2785; GFX900-NEXT: ;;#ASMSTART 2786; GFX900-NEXT: ; def s[4:5] 2787; GFX900-NEXT: ;;#ASMEND 2788; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 2789; GFX900-NEXT: ;;#ASMSTART 2790; GFX900-NEXT: ; use s8 2791; GFX900-NEXT: ;;#ASMEND 2792; GFX900-NEXT: s_setpc_b64 s[30:31] 2793; 2794; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_0: 2795; GFX90A: ; %bb.0: 2796; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2797; GFX90A-NEXT: ;;#ASMSTART 2798; GFX90A-NEXT: ; def s[4:5] 2799; GFX90A-NEXT: ;;#ASMEND 2800; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 2801; GFX90A-NEXT: ;;#ASMSTART 2802; GFX90A-NEXT: ; use s8 2803; GFX90A-NEXT: ;;#ASMEND 2804; GFX90A-NEXT: s_setpc_b64 s[30:31] 2805; 2806; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_0: 2807; GFX940: ; %bb.0: 2808; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2809; GFX940-NEXT: ;;#ASMSTART 2810; GFX940-NEXT: ; def s[0:1] 2811; GFX940-NEXT: ;;#ASMEND 2812; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 2813; GFX940-NEXT: ;;#ASMSTART 2814; GFX940-NEXT: ; use s8 2815; GFX940-NEXT: ;;#ASMEND 2816; GFX940-NEXT: s_setpc_b64 s[30:31] 2817 %vec0 = call <4 x half> asm "; def $0", "=s"() 2818 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2819 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> zeroinitializer 2820 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2821 ret void 2822} 2823 2824define void @s_shuffle_v2f16_v3f16__1_0() { 2825; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_0: 2826; GFX900: ; %bb.0: 2827; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2828; GFX900-NEXT: ;;#ASMSTART 2829; GFX900-NEXT: ; def s[4:5] 2830; GFX900-NEXT: ;;#ASMEND 2831; GFX900-NEXT: s_lshr_b32 s5, s4, 16 2832; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2833; GFX900-NEXT: ;;#ASMSTART 2834; GFX900-NEXT: ; use s8 2835; GFX900-NEXT: ;;#ASMEND 2836; GFX900-NEXT: s_setpc_b64 s[30:31] 2837; 2838; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_0: 2839; GFX90A: ; %bb.0: 2840; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2841; GFX90A-NEXT: ;;#ASMSTART 2842; GFX90A-NEXT: ; def s[4:5] 2843; GFX90A-NEXT: ;;#ASMEND 2844; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 2845; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2846; GFX90A-NEXT: ;;#ASMSTART 2847; GFX90A-NEXT: ; use s8 2848; GFX90A-NEXT: ;;#ASMEND 2849; GFX90A-NEXT: s_setpc_b64 s[30:31] 2850; 2851; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_0: 2852; GFX940: ; %bb.0: 2853; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2854; GFX940-NEXT: ;;#ASMSTART 2855; GFX940-NEXT: ; def s[0:1] 2856; GFX940-NEXT: ;;#ASMEND 2857; GFX940-NEXT: s_lshr_b32 s1, s0, 16 2858; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 2859; GFX940-NEXT: ;;#ASMSTART 2860; GFX940-NEXT: ; use s8 2861; GFX940-NEXT: ;;#ASMEND 2862; GFX940-NEXT: s_setpc_b64 s[30:31] 2863 %vec0 = call <4 x half> asm "; def $0", "=s"() 2864 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2865 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 0> 2866 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2867 ret void 2868} 2869 2870define void @s_shuffle_v2f16_v3f16__2_0() { 2871; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_0: 2872; GFX900: ; %bb.0: 2873; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2874; GFX900-NEXT: ;;#ASMSTART 2875; GFX900-NEXT: ; def s[4:5] 2876; GFX900-NEXT: ;;#ASMEND 2877; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2878; GFX900-NEXT: ;;#ASMSTART 2879; GFX900-NEXT: ; use s8 2880; GFX900-NEXT: ;;#ASMEND 2881; GFX900-NEXT: s_setpc_b64 s[30:31] 2882; 2883; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_0: 2884; GFX90A: ; %bb.0: 2885; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2886; GFX90A-NEXT: ;;#ASMSTART 2887; GFX90A-NEXT: ; def s[4:5] 2888; GFX90A-NEXT: ;;#ASMEND 2889; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2890; GFX90A-NEXT: ;;#ASMSTART 2891; GFX90A-NEXT: ; use s8 2892; GFX90A-NEXT: ;;#ASMEND 2893; GFX90A-NEXT: s_setpc_b64 s[30:31] 2894; 2895; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_0: 2896; GFX940: ; %bb.0: 2897; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2898; GFX940-NEXT: ;;#ASMSTART 2899; GFX940-NEXT: ; def s[0:1] 2900; GFX940-NEXT: ;;#ASMEND 2901; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 2902; GFX940-NEXT: ;;#ASMSTART 2903; GFX940-NEXT: ; use s8 2904; GFX940-NEXT: ;;#ASMEND 2905; GFX940-NEXT: s_setpc_b64 s[30:31] 2906 %vec0 = call <4 x half> asm "; def $0", "=s"() 2907 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2908 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 0> 2909 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2910 ret void 2911} 2912 2913define void @s_shuffle_v2f16_v3f16__3_0() { 2914; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_0: 2915; GFX900: ; %bb.0: 2916; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2917; GFX900-NEXT: ;;#ASMSTART 2918; GFX900-NEXT: ; def s[4:5] 2919; GFX900-NEXT: ;;#ASMEND 2920; GFX900-NEXT: s_lshl_b32 s8, s4, 16 2921; GFX900-NEXT: ;;#ASMSTART 2922; GFX900-NEXT: ; use s8 2923; GFX900-NEXT: ;;#ASMEND 2924; GFX900-NEXT: s_setpc_b64 s[30:31] 2925; 2926; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_0: 2927; GFX90A: ; %bb.0: 2928; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2929; GFX90A-NEXT: ;;#ASMSTART 2930; GFX90A-NEXT: ; def s[4:5] 2931; GFX90A-NEXT: ;;#ASMEND 2932; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 2933; GFX90A-NEXT: ;;#ASMSTART 2934; GFX90A-NEXT: ; use s8 2935; GFX90A-NEXT: ;;#ASMEND 2936; GFX90A-NEXT: s_setpc_b64 s[30:31] 2937; 2938; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_0: 2939; GFX940: ; %bb.0: 2940; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2941; GFX940-NEXT: ;;#ASMSTART 2942; GFX940-NEXT: ; def s[0:1] 2943; GFX940-NEXT: ;;#ASMEND 2944; GFX940-NEXT: s_lshl_b32 s8, s0, 16 2945; GFX940-NEXT: ;;#ASMSTART 2946; GFX940-NEXT: ; use s8 2947; GFX940-NEXT: ;;#ASMEND 2948; GFX940-NEXT: s_setpc_b64 s[30:31] 2949 %vec0 = call <4 x half> asm "; def $0", "=s"() 2950 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 2951 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 0> 2952 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 2953 ret void 2954} 2955 2956define void @s_shuffle_v2f16_v3f16__4_0() { 2957; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_0: 2958; GFX900: ; %bb.0: 2959; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2960; GFX900-NEXT: ;;#ASMSTART 2961; GFX900-NEXT: ; def s[4:5] 2962; GFX900-NEXT: ;;#ASMEND 2963; GFX900-NEXT: ;;#ASMSTART 2964; GFX900-NEXT: ; def s[6:7] 2965; GFX900-NEXT: ;;#ASMEND 2966; GFX900-NEXT: s_lshr_b32 s5, s6, 16 2967; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2968; GFX900-NEXT: ;;#ASMSTART 2969; GFX900-NEXT: ; use s8 2970; GFX900-NEXT: ;;#ASMEND 2971; GFX900-NEXT: s_setpc_b64 s[30:31] 2972; 2973; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_0: 2974; GFX90A: ; %bb.0: 2975; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2976; GFX90A-NEXT: ;;#ASMSTART 2977; GFX90A-NEXT: ; def s[4:5] 2978; GFX90A-NEXT: ;;#ASMEND 2979; GFX90A-NEXT: ;;#ASMSTART 2980; GFX90A-NEXT: ; def s[6:7] 2981; GFX90A-NEXT: ;;#ASMEND 2982; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 2983; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 2984; GFX90A-NEXT: ;;#ASMSTART 2985; GFX90A-NEXT: ; use s8 2986; GFX90A-NEXT: ;;#ASMEND 2987; GFX90A-NEXT: s_setpc_b64 s[30:31] 2988; 2989; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_0: 2990; GFX940: ; %bb.0: 2991; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2992; GFX940-NEXT: ;;#ASMSTART 2993; GFX940-NEXT: ; def s[0:1] 2994; GFX940-NEXT: ;;#ASMEND 2995; GFX940-NEXT: ;;#ASMSTART 2996; GFX940-NEXT: ; def s[2:3] 2997; GFX940-NEXT: ;;#ASMEND 2998; GFX940-NEXT: s_lshr_b32 s1, s2, 16 2999; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 3000; GFX940-NEXT: ;;#ASMSTART 3001; GFX940-NEXT: ; use s8 3002; GFX940-NEXT: ;;#ASMEND 3003; GFX940-NEXT: s_setpc_b64 s[30:31] 3004 %vec0 = call <4 x half> asm "; def $0", "=s"() 3005 %vec1 = call <4 x half> asm "; def $0", "=s"() 3006 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3007 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3008 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 0> 3009 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3010 ret void 3011} 3012 3013define void @s_shuffle_v2f16_v3f16__u_1() { 3014; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_1: 3015; GFX900: ; %bb.0: 3016; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3017; GFX900-NEXT: ;;#ASMSTART 3018; GFX900-NEXT: ; def s[8:9] 3019; GFX900-NEXT: ;;#ASMEND 3020; GFX900-NEXT: ;;#ASMSTART 3021; GFX900-NEXT: ; use s8 3022; GFX900-NEXT: ;;#ASMEND 3023; GFX900-NEXT: s_setpc_b64 s[30:31] 3024; 3025; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_1: 3026; GFX90A: ; %bb.0: 3027; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3028; GFX90A-NEXT: ;;#ASMSTART 3029; GFX90A-NEXT: ; def s[8:9] 3030; GFX90A-NEXT: ;;#ASMEND 3031; GFX90A-NEXT: ;;#ASMSTART 3032; GFX90A-NEXT: ; use s8 3033; GFX90A-NEXT: ;;#ASMEND 3034; GFX90A-NEXT: s_setpc_b64 s[30:31] 3035; 3036; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_1: 3037; GFX940: ; %bb.0: 3038; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3039; GFX940-NEXT: ;;#ASMSTART 3040; GFX940-NEXT: ; def s[8:9] 3041; GFX940-NEXT: ;;#ASMEND 3042; GFX940-NEXT: s_nop 0 3043; GFX940-NEXT: ;;#ASMSTART 3044; GFX940-NEXT: ; use s8 3045; GFX940-NEXT: ;;#ASMEND 3046; GFX940-NEXT: s_setpc_b64 s[30:31] 3047 %vec0 = call <4 x half> asm "; def $0", "=s"() 3048 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3049 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 1> 3050 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3051 ret void 3052} 3053 3054define void @s_shuffle_v2f16_v3f16__0_1() { 3055; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_1: 3056; GFX900: ; %bb.0: 3057; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3058; GFX900-NEXT: ;;#ASMSTART 3059; GFX900-NEXT: ; def s[8:9] 3060; GFX900-NEXT: ;;#ASMEND 3061; GFX900-NEXT: ;;#ASMSTART 3062; GFX900-NEXT: ; use s8 3063; GFX900-NEXT: ;;#ASMEND 3064; GFX900-NEXT: s_setpc_b64 s[30:31] 3065; 3066; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_1: 3067; GFX90A: ; %bb.0: 3068; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3069; GFX90A-NEXT: ;;#ASMSTART 3070; GFX90A-NEXT: ; def s[8:9] 3071; GFX90A-NEXT: ;;#ASMEND 3072; GFX90A-NEXT: ;;#ASMSTART 3073; GFX90A-NEXT: ; use s8 3074; GFX90A-NEXT: ;;#ASMEND 3075; GFX90A-NEXT: s_setpc_b64 s[30:31] 3076; 3077; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_1: 3078; GFX940: ; %bb.0: 3079; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3080; GFX940-NEXT: ;;#ASMSTART 3081; GFX940-NEXT: ; def s[8:9] 3082; GFX940-NEXT: ;;#ASMEND 3083; GFX940-NEXT: s_nop 0 3084; GFX940-NEXT: ;;#ASMSTART 3085; GFX940-NEXT: ; use s8 3086; GFX940-NEXT: ;;#ASMEND 3087; GFX940-NEXT: s_setpc_b64 s[30:31] 3088 %vec0 = call <4 x half> asm "; def $0", "=s"() 3089 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3090 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 1> 3091 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3092 ret void 3093} 3094 3095define void @s_shuffle_v2f16_v3f16__1_1() { 3096; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_1: 3097; GFX900: ; %bb.0: 3098; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3099; GFX900-NEXT: ;;#ASMSTART 3100; GFX900-NEXT: ; def s[4:5] 3101; GFX900-NEXT: ;;#ASMEND 3102; GFX900-NEXT: s_lshr_b32 s4, s4, 16 3103; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 3104; GFX900-NEXT: ;;#ASMSTART 3105; GFX900-NEXT: ; use s8 3106; GFX900-NEXT: ;;#ASMEND 3107; GFX900-NEXT: s_setpc_b64 s[30:31] 3108; 3109; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_1: 3110; GFX90A: ; %bb.0: 3111; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3112; GFX90A-NEXT: ;;#ASMSTART 3113; GFX90A-NEXT: ; def s[4:5] 3114; GFX90A-NEXT: ;;#ASMEND 3115; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 3116; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 3117; GFX90A-NEXT: ;;#ASMSTART 3118; GFX90A-NEXT: ; use s8 3119; GFX90A-NEXT: ;;#ASMEND 3120; GFX90A-NEXT: s_setpc_b64 s[30:31] 3121; 3122; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_1: 3123; GFX940: ; %bb.0: 3124; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3125; GFX940-NEXT: ;;#ASMSTART 3126; GFX940-NEXT: ; def s[0:1] 3127; GFX940-NEXT: ;;#ASMEND 3128; GFX940-NEXT: s_lshr_b32 s0, s0, 16 3129; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 3130; GFX940-NEXT: ;;#ASMSTART 3131; GFX940-NEXT: ; use s8 3132; GFX940-NEXT: ;;#ASMEND 3133; GFX940-NEXT: s_setpc_b64 s[30:31] 3134 %vec0 = call <4 x half> asm "; def $0", "=s"() 3135 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3136 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 1> 3137 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3138 ret void 3139} 3140 3141define void @s_shuffle_v2f16_v3f16__2_1() { 3142; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_1: 3143; GFX900: ; %bb.0: 3144; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3145; GFX900-NEXT: ;;#ASMSTART 3146; GFX900-NEXT: ; def s[4:5] 3147; GFX900-NEXT: ;;#ASMEND 3148; GFX900-NEXT: s_lshr_b32 s4, s4, 16 3149; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3150; GFX900-NEXT: ;;#ASMSTART 3151; GFX900-NEXT: ; use s8 3152; GFX900-NEXT: ;;#ASMEND 3153; GFX900-NEXT: s_setpc_b64 s[30:31] 3154; 3155; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_1: 3156; GFX90A: ; %bb.0: 3157; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3158; GFX90A-NEXT: ;;#ASMSTART 3159; GFX90A-NEXT: ; def s[4:5] 3160; GFX90A-NEXT: ;;#ASMEND 3161; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 3162; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3163; GFX90A-NEXT: ;;#ASMSTART 3164; GFX90A-NEXT: ; use s8 3165; GFX90A-NEXT: ;;#ASMEND 3166; GFX90A-NEXT: s_setpc_b64 s[30:31] 3167; 3168; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_1: 3169; GFX940: ; %bb.0: 3170; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3171; GFX940-NEXT: ;;#ASMSTART 3172; GFX940-NEXT: ; def s[0:1] 3173; GFX940-NEXT: ;;#ASMEND 3174; GFX940-NEXT: s_lshr_b32 s0, s0, 16 3175; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 3176; GFX940-NEXT: ;;#ASMSTART 3177; GFX940-NEXT: ; use s8 3178; GFX940-NEXT: ;;#ASMEND 3179; GFX940-NEXT: s_setpc_b64 s[30:31] 3180 %vec0 = call <4 x half> asm "; def $0", "=s"() 3181 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3182 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 1> 3183 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3184 ret void 3185} 3186 3187define void @s_shuffle_v2f16_v3f16__3_1() { 3188; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_1: 3189; GFX900: ; %bb.0: 3190; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3191; GFX900-NEXT: ;;#ASMSTART 3192; GFX900-NEXT: ; def s[8:9] 3193; GFX900-NEXT: ;;#ASMEND 3194; GFX900-NEXT: ;;#ASMSTART 3195; GFX900-NEXT: ; use s8 3196; GFX900-NEXT: ;;#ASMEND 3197; GFX900-NEXT: s_setpc_b64 s[30:31] 3198; 3199; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_1: 3200; GFX90A: ; %bb.0: 3201; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3202; GFX90A-NEXT: ;;#ASMSTART 3203; GFX90A-NEXT: ; def s[8:9] 3204; GFX90A-NEXT: ;;#ASMEND 3205; GFX90A-NEXT: ;;#ASMSTART 3206; GFX90A-NEXT: ; use s8 3207; GFX90A-NEXT: ;;#ASMEND 3208; GFX90A-NEXT: s_setpc_b64 s[30:31] 3209; 3210; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_1: 3211; GFX940: ; %bb.0: 3212; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3213; GFX940-NEXT: ;;#ASMSTART 3214; GFX940-NEXT: ; def s[8:9] 3215; GFX940-NEXT: ;;#ASMEND 3216; GFX940-NEXT: s_nop 0 3217; GFX940-NEXT: ;;#ASMSTART 3218; GFX940-NEXT: ; use s8 3219; GFX940-NEXT: ;;#ASMEND 3220; GFX940-NEXT: s_setpc_b64 s[30:31] 3221 %vec0 = call <4 x half> asm "; def $0", "=s"() 3222 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3223 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 1> 3224 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3225 ret void 3226} 3227 3228define void @s_shuffle_v2f16_v3f16__4_1() { 3229; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_1: 3230; GFX900: ; %bb.0: 3231; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3232; GFX900-NEXT: ;;#ASMSTART 3233; GFX900-NEXT: ; def s[4:5] 3234; GFX900-NEXT: ;;#ASMEND 3235; GFX900-NEXT: ;;#ASMSTART 3236; GFX900-NEXT: ; def s[6:7] 3237; GFX900-NEXT: ;;#ASMEND 3238; GFX900-NEXT: s_lshr_b32 s4, s4, 16 3239; GFX900-NEXT: s_lshr_b32 s5, s6, 16 3240; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3241; GFX900-NEXT: ;;#ASMSTART 3242; GFX900-NEXT: ; use s8 3243; GFX900-NEXT: ;;#ASMEND 3244; GFX900-NEXT: s_setpc_b64 s[30:31] 3245; 3246; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_1: 3247; GFX90A: ; %bb.0: 3248; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3249; GFX90A-NEXT: ;;#ASMSTART 3250; GFX90A-NEXT: ; def s[4:5] 3251; GFX90A-NEXT: ;;#ASMEND 3252; GFX90A-NEXT: ;;#ASMSTART 3253; GFX90A-NEXT: ; def s[6:7] 3254; GFX90A-NEXT: ;;#ASMEND 3255; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 3256; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 3257; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3258; GFX90A-NEXT: ;;#ASMSTART 3259; GFX90A-NEXT: ; use s8 3260; GFX90A-NEXT: ;;#ASMEND 3261; GFX90A-NEXT: s_setpc_b64 s[30:31] 3262; 3263; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_1: 3264; GFX940: ; %bb.0: 3265; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3266; GFX940-NEXT: ;;#ASMSTART 3267; GFX940-NEXT: ; def s[0:1] 3268; GFX940-NEXT: ;;#ASMEND 3269; GFX940-NEXT: ;;#ASMSTART 3270; GFX940-NEXT: ; def s[2:3] 3271; GFX940-NEXT: ;;#ASMEND 3272; GFX940-NEXT: s_lshr_b32 s0, s0, 16 3273; GFX940-NEXT: s_lshr_b32 s1, s2, 16 3274; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 3275; GFX940-NEXT: ;;#ASMSTART 3276; GFX940-NEXT: ; use s8 3277; GFX940-NEXT: ;;#ASMEND 3278; GFX940-NEXT: s_setpc_b64 s[30:31] 3279 %vec0 = call <4 x half> asm "; def $0", "=s"() 3280 %vec1 = call <4 x half> asm "; def $0", "=s"() 3281 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3282 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3283 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 1> 3284 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3285 ret void 3286} 3287 3288define void @s_shuffle_v2f16_v3f16__u_2() { 3289; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_2: 3290; GFX900: ; %bb.0: 3291; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3292; GFX900-NEXT: ;;#ASMSTART 3293; GFX900-NEXT: ; def s[4:5] 3294; GFX900-NEXT: ;;#ASMEND 3295; GFX900-NEXT: s_lshl_b32 s8, s5, 16 3296; GFX900-NEXT: ;;#ASMSTART 3297; GFX900-NEXT: ; use s8 3298; GFX900-NEXT: ;;#ASMEND 3299; GFX900-NEXT: s_setpc_b64 s[30:31] 3300; 3301; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_2: 3302; GFX90A: ; %bb.0: 3303; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3304; GFX90A-NEXT: ;;#ASMSTART 3305; GFX90A-NEXT: ; def s[4:5] 3306; GFX90A-NEXT: ;;#ASMEND 3307; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 3308; GFX90A-NEXT: ;;#ASMSTART 3309; GFX90A-NEXT: ; use s8 3310; GFX90A-NEXT: ;;#ASMEND 3311; GFX90A-NEXT: s_setpc_b64 s[30:31] 3312; 3313; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_2: 3314; GFX940: ; %bb.0: 3315; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3316; GFX940-NEXT: ;;#ASMSTART 3317; GFX940-NEXT: ; def s[0:1] 3318; GFX940-NEXT: ;;#ASMEND 3319; GFX940-NEXT: s_lshl_b32 s8, s1, 16 3320; GFX940-NEXT: ;;#ASMSTART 3321; GFX940-NEXT: ; use s8 3322; GFX940-NEXT: ;;#ASMEND 3323; GFX940-NEXT: s_setpc_b64 s[30:31] 3324 %vec0 = call <4 x half> asm "; def $0", "=s"() 3325 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3326 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 2> 3327 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3328 ret void 3329} 3330 3331define void @s_shuffle_v2f16_v3f16__0_2() { 3332; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_2: 3333; GFX900: ; %bb.0: 3334; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3335; GFX900-NEXT: ;;#ASMSTART 3336; GFX900-NEXT: ; def s[4:5] 3337; GFX900-NEXT: ;;#ASMEND 3338; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3339; GFX900-NEXT: ;;#ASMSTART 3340; GFX900-NEXT: ; use s8 3341; GFX900-NEXT: ;;#ASMEND 3342; GFX900-NEXT: s_setpc_b64 s[30:31] 3343; 3344; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_2: 3345; GFX90A: ; %bb.0: 3346; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3347; GFX90A-NEXT: ;;#ASMSTART 3348; GFX90A-NEXT: ; def s[4:5] 3349; GFX90A-NEXT: ;;#ASMEND 3350; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3351; GFX90A-NEXT: ;;#ASMSTART 3352; GFX90A-NEXT: ; use s8 3353; GFX90A-NEXT: ;;#ASMEND 3354; GFX90A-NEXT: s_setpc_b64 s[30:31] 3355; 3356; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_2: 3357; GFX940: ; %bb.0: 3358; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3359; GFX940-NEXT: ;;#ASMSTART 3360; GFX940-NEXT: ; def s[0:1] 3361; GFX940-NEXT: ;;#ASMEND 3362; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 3363; GFX940-NEXT: ;;#ASMSTART 3364; GFX940-NEXT: ; use s8 3365; GFX940-NEXT: ;;#ASMEND 3366; GFX940-NEXT: s_setpc_b64 s[30:31] 3367 %vec0 = call <4 x half> asm "; def $0", "=s"() 3368 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3369 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 2> 3370 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3371 ret void 3372} 3373 3374define void @s_shuffle_v2f16_v3f16__1_2() { 3375; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_2: 3376; GFX900: ; %bb.0: 3377; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3378; GFX900-NEXT: ;;#ASMSTART 3379; GFX900-NEXT: ; def s[4:5] 3380; GFX900-NEXT: ;;#ASMEND 3381; GFX900-NEXT: s_lshr_b32 s4, s4, 16 3382; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3383; GFX900-NEXT: ;;#ASMSTART 3384; GFX900-NEXT: ; use s8 3385; GFX900-NEXT: ;;#ASMEND 3386; GFX900-NEXT: s_setpc_b64 s[30:31] 3387; 3388; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_2: 3389; GFX90A: ; %bb.0: 3390; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3391; GFX90A-NEXT: ;;#ASMSTART 3392; GFX90A-NEXT: ; def s[4:5] 3393; GFX90A-NEXT: ;;#ASMEND 3394; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 3395; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3396; GFX90A-NEXT: ;;#ASMSTART 3397; GFX90A-NEXT: ; use s8 3398; GFX90A-NEXT: ;;#ASMEND 3399; GFX90A-NEXT: s_setpc_b64 s[30:31] 3400; 3401; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_2: 3402; GFX940: ; %bb.0: 3403; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3404; GFX940-NEXT: ;;#ASMSTART 3405; GFX940-NEXT: ; def s[0:1] 3406; GFX940-NEXT: ;;#ASMEND 3407; GFX940-NEXT: s_lshr_b32 s0, s0, 16 3408; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 3409; GFX940-NEXT: ;;#ASMSTART 3410; GFX940-NEXT: ; use s8 3411; GFX940-NEXT: ;;#ASMEND 3412; GFX940-NEXT: s_setpc_b64 s[30:31] 3413 %vec0 = call <4 x half> asm "; def $0", "=s"() 3414 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3415 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 2> 3416 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3417 ret void 3418} 3419 3420define void @s_shuffle_v2f16_v3f16__2_2() { 3421; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_2: 3422; GFX900: ; %bb.0: 3423; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3424; GFX900-NEXT: ;;#ASMSTART 3425; GFX900-NEXT: ; def s[4:5] 3426; GFX900-NEXT: ;;#ASMEND 3427; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 3428; GFX900-NEXT: ;;#ASMSTART 3429; GFX900-NEXT: ; use s8 3430; GFX900-NEXT: ;;#ASMEND 3431; GFX900-NEXT: s_setpc_b64 s[30:31] 3432; 3433; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_2: 3434; GFX90A: ; %bb.0: 3435; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3436; GFX90A-NEXT: ;;#ASMSTART 3437; GFX90A-NEXT: ; def s[4:5] 3438; GFX90A-NEXT: ;;#ASMEND 3439; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 3440; GFX90A-NEXT: ;;#ASMSTART 3441; GFX90A-NEXT: ; use s8 3442; GFX90A-NEXT: ;;#ASMEND 3443; GFX90A-NEXT: s_setpc_b64 s[30:31] 3444; 3445; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_2: 3446; GFX940: ; %bb.0: 3447; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3448; GFX940-NEXT: ;;#ASMSTART 3449; GFX940-NEXT: ; def s[0:1] 3450; GFX940-NEXT: ;;#ASMEND 3451; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 3452; GFX940-NEXT: ;;#ASMSTART 3453; GFX940-NEXT: ; use s8 3454; GFX940-NEXT: ;;#ASMEND 3455; GFX940-NEXT: s_setpc_b64 s[30:31] 3456 %vec0 = call <4 x half> asm "; def $0", "=s"() 3457 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3458 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 2> 3459 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3460 ret void 3461} 3462 3463define void @s_shuffle_v2f16_v3f16__3_2() { 3464; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_2: 3465; GFX900: ; %bb.0: 3466; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3467; GFX900-NEXT: ;;#ASMSTART 3468; GFX900-NEXT: ; def s[4:5] 3469; GFX900-NEXT: ;;#ASMEND 3470; GFX900-NEXT: s_lshl_b32 s8, s5, 16 3471; GFX900-NEXT: ;;#ASMSTART 3472; GFX900-NEXT: ; use s8 3473; GFX900-NEXT: ;;#ASMEND 3474; GFX900-NEXT: s_setpc_b64 s[30:31] 3475; 3476; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_2: 3477; GFX90A: ; %bb.0: 3478; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3479; GFX90A-NEXT: ;;#ASMSTART 3480; GFX90A-NEXT: ; def s[4:5] 3481; GFX90A-NEXT: ;;#ASMEND 3482; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 3483; GFX90A-NEXT: ;;#ASMSTART 3484; GFX90A-NEXT: ; use s8 3485; GFX90A-NEXT: ;;#ASMEND 3486; GFX90A-NEXT: s_setpc_b64 s[30:31] 3487; 3488; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_2: 3489; GFX940: ; %bb.0: 3490; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3491; GFX940-NEXT: ;;#ASMSTART 3492; GFX940-NEXT: ; def s[0:1] 3493; GFX940-NEXT: ;;#ASMEND 3494; GFX940-NEXT: s_lshl_b32 s8, s1, 16 3495; GFX940-NEXT: ;;#ASMSTART 3496; GFX940-NEXT: ; use s8 3497; GFX940-NEXT: ;;#ASMEND 3498; GFX940-NEXT: s_setpc_b64 s[30:31] 3499 %vec0 = call <4 x half> asm "; def $0", "=s"() 3500 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3501 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 2> 3502 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3503 ret void 3504} 3505 3506define void @s_shuffle_v2f16_v3f16__4_2() { 3507; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_2: 3508; GFX900: ; %bb.0: 3509; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3510; GFX900-NEXT: ;;#ASMSTART 3511; GFX900-NEXT: ; def s[4:5] 3512; GFX900-NEXT: ;;#ASMEND 3513; GFX900-NEXT: ;;#ASMSTART 3514; GFX900-NEXT: ; def s[6:7] 3515; GFX900-NEXT: ;;#ASMEND 3516; GFX900-NEXT: s_lshr_b32 s4, s6, 16 3517; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3518; GFX900-NEXT: ;;#ASMSTART 3519; GFX900-NEXT: ; use s8 3520; GFX900-NEXT: ;;#ASMEND 3521; GFX900-NEXT: s_setpc_b64 s[30:31] 3522; 3523; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_2: 3524; GFX90A: ; %bb.0: 3525; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3526; GFX90A-NEXT: ;;#ASMSTART 3527; GFX90A-NEXT: ; def s[4:5] 3528; GFX90A-NEXT: ;;#ASMEND 3529; GFX90A-NEXT: ;;#ASMSTART 3530; GFX90A-NEXT: ; def s[6:7] 3531; GFX90A-NEXT: ;;#ASMEND 3532; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 3533; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3534; GFX90A-NEXT: ;;#ASMSTART 3535; GFX90A-NEXT: ; use s8 3536; GFX90A-NEXT: ;;#ASMEND 3537; GFX90A-NEXT: s_setpc_b64 s[30:31] 3538; 3539; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_2: 3540; GFX940: ; %bb.0: 3541; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3542; GFX940-NEXT: ;;#ASMSTART 3543; GFX940-NEXT: ; def s[0:1] 3544; GFX940-NEXT: ;;#ASMEND 3545; GFX940-NEXT: ;;#ASMSTART 3546; GFX940-NEXT: ; def s[2:3] 3547; GFX940-NEXT: ;;#ASMEND 3548; GFX940-NEXT: s_lshr_b32 s0, s2, 16 3549; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 3550; GFX940-NEXT: ;;#ASMSTART 3551; GFX940-NEXT: ; use s8 3552; GFX940-NEXT: ;;#ASMEND 3553; GFX940-NEXT: s_setpc_b64 s[30:31] 3554 %vec0 = call <4 x half> asm "; def $0", "=s"() 3555 %vec1 = call <4 x half> asm "; def $0", "=s"() 3556 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3557 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3558 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 2> 3559 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3560 ret void 3561} 3562 3563define void @s_shuffle_v2f16_v3f16__u_3() { 3564; GFX9-LABEL: s_shuffle_v2f16_v3f16__u_3: 3565; GFX9: ; %bb.0: 3566; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3567; GFX9-NEXT: ;;#ASMSTART 3568; GFX9-NEXT: ; use s8 3569; GFX9-NEXT: ;;#ASMEND 3570; GFX9-NEXT: s_setpc_b64 s[30:31] 3571 %vec0 = call <4 x half> asm "; def $0", "=s"() 3572 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3573 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 poison, i32 3> 3574 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3575 ret void 3576} 3577 3578define void @s_shuffle_v2f16_v3f16__0_3() { 3579; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_3: 3580; GFX900: ; %bb.0: 3581; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3582; GFX900-NEXT: ;;#ASMSTART 3583; GFX900-NEXT: ; def s[8:9] 3584; GFX900-NEXT: ;;#ASMEND 3585; GFX900-NEXT: ;;#ASMSTART 3586; GFX900-NEXT: ; use s8 3587; GFX900-NEXT: ;;#ASMEND 3588; GFX900-NEXT: s_setpc_b64 s[30:31] 3589; 3590; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_3: 3591; GFX90A: ; %bb.0: 3592; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3593; GFX90A-NEXT: ;;#ASMSTART 3594; GFX90A-NEXT: ; def s[8:9] 3595; GFX90A-NEXT: ;;#ASMEND 3596; GFX90A-NEXT: ;;#ASMSTART 3597; GFX90A-NEXT: ; use s8 3598; GFX90A-NEXT: ;;#ASMEND 3599; GFX90A-NEXT: s_setpc_b64 s[30:31] 3600; 3601; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_3: 3602; GFX940: ; %bb.0: 3603; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3604; GFX940-NEXT: ;;#ASMSTART 3605; GFX940-NEXT: ; def s[8:9] 3606; GFX940-NEXT: ;;#ASMEND 3607; GFX940-NEXT: s_nop 0 3608; GFX940-NEXT: ;;#ASMSTART 3609; GFX940-NEXT: ; use s8 3610; GFX940-NEXT: ;;#ASMEND 3611; GFX940-NEXT: s_setpc_b64 s[30:31] 3612 %vec0 = call <4 x half> asm "; def $0", "=s"() 3613 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3614 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 0, i32 3> 3615 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3616 ret void 3617} 3618 3619define void @s_shuffle_v2f16_v3f16__1_3() { 3620; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_3: 3621; GFX900: ; %bb.0: 3622; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3623; GFX900-NEXT: ;;#ASMSTART 3624; GFX900-NEXT: ; def s[4:5] 3625; GFX900-NEXT: ;;#ASMEND 3626; GFX900-NEXT: s_lshr_b32 s8, s4, 16 3627; GFX900-NEXT: ;;#ASMSTART 3628; GFX900-NEXT: ; use s8 3629; GFX900-NEXT: ;;#ASMEND 3630; GFX900-NEXT: s_setpc_b64 s[30:31] 3631; 3632; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_3: 3633; GFX90A: ; %bb.0: 3634; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3635; GFX90A-NEXT: ;;#ASMSTART 3636; GFX90A-NEXT: ; def s[4:5] 3637; GFX90A-NEXT: ;;#ASMEND 3638; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 3639; GFX90A-NEXT: ;;#ASMSTART 3640; GFX90A-NEXT: ; use s8 3641; GFX90A-NEXT: ;;#ASMEND 3642; GFX90A-NEXT: s_setpc_b64 s[30:31] 3643; 3644; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_3: 3645; GFX940: ; %bb.0: 3646; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3647; GFX940-NEXT: ;;#ASMSTART 3648; GFX940-NEXT: ; def s[0:1] 3649; GFX940-NEXT: ;;#ASMEND 3650; GFX940-NEXT: s_lshr_b32 s8, s0, 16 3651; GFX940-NEXT: ;;#ASMSTART 3652; GFX940-NEXT: ; use s8 3653; GFX940-NEXT: ;;#ASMEND 3654; GFX940-NEXT: s_setpc_b64 s[30:31] 3655 %vec0 = call <4 x half> asm "; def $0", "=s"() 3656 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3657 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 1, i32 3> 3658 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3659 ret void 3660} 3661 3662define void @s_shuffle_v2f16_v3f16__2_3() { 3663; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_3: 3664; GFX900: ; %bb.0: 3665; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3666; GFX900-NEXT: ;;#ASMSTART 3667; GFX900-NEXT: ; def s[4:5] 3668; GFX900-NEXT: ;;#ASMEND 3669; GFX900-NEXT: s_mov_b32 s8, s5 3670; GFX900-NEXT: ;;#ASMSTART 3671; GFX900-NEXT: ; use s8 3672; GFX900-NEXT: ;;#ASMEND 3673; GFX900-NEXT: s_setpc_b64 s[30:31] 3674; 3675; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_3: 3676; GFX90A: ; %bb.0: 3677; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3678; GFX90A-NEXT: ;;#ASMSTART 3679; GFX90A-NEXT: ; def s[4:5] 3680; GFX90A-NEXT: ;;#ASMEND 3681; GFX90A-NEXT: s_mov_b32 s8, s5 3682; GFX90A-NEXT: ;;#ASMSTART 3683; GFX90A-NEXT: ; use s8 3684; GFX90A-NEXT: ;;#ASMEND 3685; GFX90A-NEXT: s_setpc_b64 s[30:31] 3686; 3687; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_3: 3688; GFX940: ; %bb.0: 3689; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3690; GFX940-NEXT: ;;#ASMSTART 3691; GFX940-NEXT: ; def s[0:1] 3692; GFX940-NEXT: ;;#ASMEND 3693; GFX940-NEXT: s_mov_b32 s8, s1 3694; GFX940-NEXT: ;;#ASMSTART 3695; GFX940-NEXT: ; use s8 3696; GFX940-NEXT: ;;#ASMEND 3697; GFX940-NEXT: s_setpc_b64 s[30:31] 3698 %vec0 = call <4 x half> asm "; def $0", "=s"() 3699 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3700 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 2, i32 3> 3701 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3702 ret void 3703} 3704 3705define void @s_shuffle_v2f16_v3f16__3_3() { 3706; GFX9-LABEL: s_shuffle_v2f16_v3f16__3_3: 3707; GFX9: ; %bb.0: 3708; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3709; GFX9-NEXT: ;;#ASMSTART 3710; GFX9-NEXT: ; use s8 3711; GFX9-NEXT: ;;#ASMEND 3712; GFX9-NEXT: s_setpc_b64 s[30:31] 3713 %vec0 = call <4 x half> asm "; def $0", "=s"() 3714 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3715 %shuf = shufflevector <3 x half> %extract3, <3 x half> poison, <2 x i32> <i32 3, i32 3> 3716 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3717 ret void 3718} 3719 3720define void @s_shuffle_v2f16_v3f16__4_3() { 3721; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_3: 3722; GFX900: ; %bb.0: 3723; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3724; GFX900-NEXT: ;;#ASMSTART 3725; GFX900-NEXT: ; def s[4:5] 3726; GFX900-NEXT: ;;#ASMEND 3727; GFX900-NEXT: s_lshr_b32 s5, s4, 16 3728; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3729; GFX900-NEXT: ;;#ASMSTART 3730; GFX900-NEXT: ; use s8 3731; GFX900-NEXT: ;;#ASMEND 3732; GFX900-NEXT: s_setpc_b64 s[30:31] 3733; 3734; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_3: 3735; GFX90A: ; %bb.0: 3736; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3737; GFX90A-NEXT: ;;#ASMSTART 3738; GFX90A-NEXT: ; def s[4:5] 3739; GFX90A-NEXT: ;;#ASMEND 3740; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 3741; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3742; GFX90A-NEXT: ;;#ASMSTART 3743; GFX90A-NEXT: ; use s8 3744; GFX90A-NEXT: ;;#ASMEND 3745; GFX90A-NEXT: s_setpc_b64 s[30:31] 3746; 3747; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_3: 3748; GFX940: ; %bb.0: 3749; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3750; GFX940-NEXT: ;;#ASMSTART 3751; GFX940-NEXT: ; def s[0:1] 3752; GFX940-NEXT: ;;#ASMEND 3753; GFX940-NEXT: s_lshr_b32 s1, s0, 16 3754; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 3755; GFX940-NEXT: ;;#ASMSTART 3756; GFX940-NEXT: ; use s8 3757; GFX940-NEXT: ;;#ASMEND 3758; GFX940-NEXT: s_setpc_b64 s[30:31] 3759 %vec0 = call <4 x half> asm "; def $0", "=s"() 3760 %vec1 = call <4 x half> asm "; def $0", "=s"() 3761 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3762 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3763 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 3> 3764 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3765 ret void 3766} 3767 3768define void @s_shuffle_v2f16_v3f16__u_4() { 3769; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_4: 3770; GFX900: ; %bb.0: 3771; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3772; GFX900-NEXT: ;;#ASMSTART 3773; GFX900-NEXT: ; def s[8:9] 3774; GFX900-NEXT: ;;#ASMEND 3775; GFX900-NEXT: ;;#ASMSTART 3776; GFX900-NEXT: ; use s8 3777; GFX900-NEXT: ;;#ASMEND 3778; GFX900-NEXT: s_setpc_b64 s[30:31] 3779; 3780; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_4: 3781; GFX90A: ; %bb.0: 3782; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3783; GFX90A-NEXT: ;;#ASMSTART 3784; GFX90A-NEXT: ; def s[8:9] 3785; GFX90A-NEXT: ;;#ASMEND 3786; GFX90A-NEXT: ;;#ASMSTART 3787; GFX90A-NEXT: ; use s8 3788; GFX90A-NEXT: ;;#ASMEND 3789; GFX90A-NEXT: s_setpc_b64 s[30:31] 3790; 3791; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_4: 3792; GFX940: ; %bb.0: 3793; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3794; GFX940-NEXT: ;;#ASMSTART 3795; GFX940-NEXT: ; def s[8:9] 3796; GFX940-NEXT: ;;#ASMEND 3797; GFX940-NEXT: s_nop 0 3798; GFX940-NEXT: ;;#ASMSTART 3799; GFX940-NEXT: ; use s8 3800; GFX940-NEXT: ;;#ASMEND 3801; GFX940-NEXT: s_setpc_b64 s[30:31] 3802 %vec0 = call <4 x half> asm "; def $0", "=s"() 3803 %vec1 = call <4 x half> asm "; def $0", "=s"() 3804 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3805 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3806 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 poison, i32 4> 3807 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3808 ret void 3809} 3810 3811define void @s_shuffle_v2f16_v3f16__0_4() { 3812; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_4: 3813; GFX900: ; %bb.0: 3814; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3815; GFX900-NEXT: ;;#ASMSTART 3816; GFX900-NEXT: ; def s[4:5] 3817; GFX900-NEXT: ;;#ASMEND 3818; GFX900-NEXT: ;;#ASMSTART 3819; GFX900-NEXT: ; def s[6:7] 3820; GFX900-NEXT: ;;#ASMEND 3821; GFX900-NEXT: s_lshr_b32 s5, s6, 16 3822; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3823; GFX900-NEXT: ;;#ASMSTART 3824; GFX900-NEXT: ; use s8 3825; GFX900-NEXT: ;;#ASMEND 3826; GFX900-NEXT: s_setpc_b64 s[30:31] 3827; 3828; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_4: 3829; GFX90A: ; %bb.0: 3830; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3831; GFX90A-NEXT: ;;#ASMSTART 3832; GFX90A-NEXT: ; def s[4:5] 3833; GFX90A-NEXT: ;;#ASMEND 3834; GFX90A-NEXT: ;;#ASMSTART 3835; GFX90A-NEXT: ; def s[6:7] 3836; GFX90A-NEXT: ;;#ASMEND 3837; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 3838; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3839; GFX90A-NEXT: ;;#ASMSTART 3840; GFX90A-NEXT: ; use s8 3841; GFX90A-NEXT: ;;#ASMEND 3842; GFX90A-NEXT: s_setpc_b64 s[30:31] 3843; 3844; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_4: 3845; GFX940: ; %bb.0: 3846; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3847; GFX940-NEXT: ;;#ASMSTART 3848; GFX940-NEXT: ; def s[0:1] 3849; GFX940-NEXT: ;;#ASMEND 3850; GFX940-NEXT: ;;#ASMSTART 3851; GFX940-NEXT: ; def s[2:3] 3852; GFX940-NEXT: ;;#ASMEND 3853; GFX940-NEXT: s_lshr_b32 s1, s2, 16 3854; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 3855; GFX940-NEXT: ;;#ASMSTART 3856; GFX940-NEXT: ; use s8 3857; GFX940-NEXT: ;;#ASMEND 3858; GFX940-NEXT: s_setpc_b64 s[30:31] 3859 %vec0 = call <4 x half> asm "; def $0", "=s"() 3860 %vec1 = call <4 x half> asm "; def $0", "=s"() 3861 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3862 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3863 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 0, i32 4> 3864 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3865 ret void 3866} 3867 3868define void @s_shuffle_v2f16_v3f16__1_4() { 3869; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_4: 3870; GFX900: ; %bb.0: 3871; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3872; GFX900-NEXT: ;;#ASMSTART 3873; GFX900-NEXT: ; def s[4:5] 3874; GFX900-NEXT: ;;#ASMEND 3875; GFX900-NEXT: ;;#ASMSTART 3876; GFX900-NEXT: ; def s[6:7] 3877; GFX900-NEXT: ;;#ASMEND 3878; GFX900-NEXT: s_lshr_b32 s5, s6, 16 3879; GFX900-NEXT: s_lshr_b32 s4, s4, 16 3880; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3881; GFX900-NEXT: ;;#ASMSTART 3882; GFX900-NEXT: ; use s8 3883; GFX900-NEXT: ;;#ASMEND 3884; GFX900-NEXT: s_setpc_b64 s[30:31] 3885; 3886; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_4: 3887; GFX90A: ; %bb.0: 3888; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3889; GFX90A-NEXT: ;;#ASMSTART 3890; GFX90A-NEXT: ; def s[4:5] 3891; GFX90A-NEXT: ;;#ASMEND 3892; GFX90A-NEXT: ;;#ASMSTART 3893; GFX90A-NEXT: ; def s[6:7] 3894; GFX90A-NEXT: ;;#ASMEND 3895; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 3896; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 3897; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 3898; GFX90A-NEXT: ;;#ASMSTART 3899; GFX90A-NEXT: ; use s8 3900; GFX90A-NEXT: ;;#ASMEND 3901; GFX90A-NEXT: s_setpc_b64 s[30:31] 3902; 3903; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_4: 3904; GFX940: ; %bb.0: 3905; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3906; GFX940-NEXT: ;;#ASMSTART 3907; GFX940-NEXT: ; def s[0:1] 3908; GFX940-NEXT: ;;#ASMEND 3909; GFX940-NEXT: ;;#ASMSTART 3910; GFX940-NEXT: ; def s[2:3] 3911; GFX940-NEXT: ;;#ASMEND 3912; GFX940-NEXT: s_lshr_b32 s1, s2, 16 3913; GFX940-NEXT: s_lshr_b32 s0, s0, 16 3914; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 3915; GFX940-NEXT: ;;#ASMSTART 3916; GFX940-NEXT: ; use s8 3917; GFX940-NEXT: ;;#ASMEND 3918; GFX940-NEXT: s_setpc_b64 s[30:31] 3919 %vec0 = call <4 x half> asm "; def $0", "=s"() 3920 %vec1 = call <4 x half> asm "; def $0", "=s"() 3921 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3922 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3923 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 1, i32 4> 3924 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3925 ret void 3926} 3927 3928define void @s_shuffle_v2f16_v3f16__2_4() { 3929; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_4: 3930; GFX900: ; %bb.0: 3931; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3932; GFX900-NEXT: ;;#ASMSTART 3933; GFX900-NEXT: ; def s[4:5] 3934; GFX900-NEXT: ;;#ASMEND 3935; GFX900-NEXT: ;;#ASMSTART 3936; GFX900-NEXT: ; def s[6:7] 3937; GFX900-NEXT: ;;#ASMEND 3938; GFX900-NEXT: s_lshr_b32 s4, s6, 16 3939; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3940; GFX900-NEXT: ;;#ASMSTART 3941; GFX900-NEXT: ; use s8 3942; GFX900-NEXT: ;;#ASMEND 3943; GFX900-NEXT: s_setpc_b64 s[30:31] 3944; 3945; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_4: 3946; GFX90A: ; %bb.0: 3947; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3948; GFX90A-NEXT: ;;#ASMSTART 3949; GFX90A-NEXT: ; def s[4:5] 3950; GFX90A-NEXT: ;;#ASMEND 3951; GFX90A-NEXT: ;;#ASMSTART 3952; GFX90A-NEXT: ; def s[6:7] 3953; GFX90A-NEXT: ;;#ASMEND 3954; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 3955; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3956; GFX90A-NEXT: ;;#ASMSTART 3957; GFX90A-NEXT: ; use s8 3958; GFX90A-NEXT: ;;#ASMEND 3959; GFX90A-NEXT: s_setpc_b64 s[30:31] 3960; 3961; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_4: 3962; GFX940: ; %bb.0: 3963; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3964; GFX940-NEXT: ;;#ASMSTART 3965; GFX940-NEXT: ; def s[0:1] 3966; GFX940-NEXT: ;;#ASMEND 3967; GFX940-NEXT: ;;#ASMSTART 3968; GFX940-NEXT: ; def s[2:3] 3969; GFX940-NEXT: ;;#ASMEND 3970; GFX940-NEXT: s_lshr_b32 s0, s2, 16 3971; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 3972; GFX940-NEXT: ;;#ASMSTART 3973; GFX940-NEXT: ; use s8 3974; GFX940-NEXT: ;;#ASMEND 3975; GFX940-NEXT: s_setpc_b64 s[30:31] 3976 %vec0 = call <4 x half> asm "; def $0", "=s"() 3977 %vec1 = call <4 x half> asm "; def $0", "=s"() 3978 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3979 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 3980 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 2, i32 4> 3981 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 3982 ret void 3983} 3984 3985define void @s_shuffle_v2f16_v3f16__3_4() { 3986; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_4: 3987; GFX900: ; %bb.0: 3988; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3989; GFX900-NEXT: ;;#ASMSTART 3990; GFX900-NEXT: ; def s[8:9] 3991; GFX900-NEXT: ;;#ASMEND 3992; GFX900-NEXT: ;;#ASMSTART 3993; GFX900-NEXT: ; use s8 3994; GFX900-NEXT: ;;#ASMEND 3995; GFX900-NEXT: s_setpc_b64 s[30:31] 3996; 3997; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_4: 3998; GFX90A: ; %bb.0: 3999; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4000; GFX90A-NEXT: ;;#ASMSTART 4001; GFX90A-NEXT: ; def s[8:9] 4002; GFX90A-NEXT: ;;#ASMEND 4003; GFX90A-NEXT: ;;#ASMSTART 4004; GFX90A-NEXT: ; use s8 4005; GFX90A-NEXT: ;;#ASMEND 4006; GFX90A-NEXT: s_setpc_b64 s[30:31] 4007; 4008; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_4: 4009; GFX940: ; %bb.0: 4010; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4011; GFX940-NEXT: ;;#ASMSTART 4012; GFX940-NEXT: ; def s[8:9] 4013; GFX940-NEXT: ;;#ASMEND 4014; GFX940-NEXT: s_nop 0 4015; GFX940-NEXT: ;;#ASMSTART 4016; GFX940-NEXT: ; use s8 4017; GFX940-NEXT: ;;#ASMEND 4018; GFX940-NEXT: s_setpc_b64 s[30:31] 4019 %vec0 = call <4 x half> asm "; def $0", "=s"() 4020 %vec1 = call <4 x half> asm "; def $0", "=s"() 4021 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4022 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4023 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 3, i32 4> 4024 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 4025 ret void 4026} 4027 4028define void @s_shuffle_v2f16_v3f16__4_4() { 4029; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_4: 4030; GFX900: ; %bb.0: 4031; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4032; GFX900-NEXT: ;;#ASMSTART 4033; GFX900-NEXT: ; def s[4:5] 4034; GFX900-NEXT: ;;#ASMEND 4035; GFX900-NEXT: s_lshr_b32 s4, s4, 16 4036; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 4037; GFX900-NEXT: ;;#ASMSTART 4038; GFX900-NEXT: ; use s8 4039; GFX900-NEXT: ;;#ASMEND 4040; GFX900-NEXT: s_setpc_b64 s[30:31] 4041; 4042; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_4: 4043; GFX90A: ; %bb.0: 4044; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4045; GFX90A-NEXT: ;;#ASMSTART 4046; GFX90A-NEXT: ; def s[4:5] 4047; GFX90A-NEXT: ;;#ASMEND 4048; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 4049; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 4050; GFX90A-NEXT: ;;#ASMSTART 4051; GFX90A-NEXT: ; use s8 4052; GFX90A-NEXT: ;;#ASMEND 4053; GFX90A-NEXT: s_setpc_b64 s[30:31] 4054; 4055; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_4: 4056; GFX940: ; %bb.0: 4057; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4058; GFX940-NEXT: ;;#ASMSTART 4059; GFX940-NEXT: ; def s[0:1] 4060; GFX940-NEXT: ;;#ASMEND 4061; GFX940-NEXT: s_lshr_b32 s0, s0, 16 4062; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 4063; GFX940-NEXT: ;;#ASMSTART 4064; GFX940-NEXT: ; use s8 4065; GFX940-NEXT: ;;#ASMEND 4066; GFX940-NEXT: s_setpc_b64 s[30:31] 4067 %vec0 = call <4 x half> asm "; def $0", "=s"() 4068 %vec1 = call <4 x half> asm "; def $0", "=s"() 4069 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4070 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4071 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 4> 4072 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 4073 ret void 4074} 4075 4076define void @s_shuffle_v2f16_v3f16__u_5() { 4077; GFX900-LABEL: s_shuffle_v2f16_v3f16__u_5: 4078; GFX900: ; %bb.0: 4079; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4080; GFX900-NEXT: ;;#ASMSTART 4081; GFX900-NEXT: ; def s[4:5] 4082; GFX900-NEXT: ;;#ASMEND 4083; GFX900-NEXT: s_lshl_b32 s8, s5, 16 4084; GFX900-NEXT: ;;#ASMSTART 4085; GFX900-NEXT: ; use s8 4086; GFX900-NEXT: ;;#ASMEND 4087; GFX900-NEXT: s_setpc_b64 s[30:31] 4088; 4089; GFX90A-LABEL: s_shuffle_v2f16_v3f16__u_5: 4090; GFX90A: ; %bb.0: 4091; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4092; GFX90A-NEXT: ;;#ASMSTART 4093; GFX90A-NEXT: ; def s[4:5] 4094; GFX90A-NEXT: ;;#ASMEND 4095; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 4096; GFX90A-NEXT: ;;#ASMSTART 4097; GFX90A-NEXT: ; use s8 4098; GFX90A-NEXT: ;;#ASMEND 4099; GFX90A-NEXT: s_setpc_b64 s[30:31] 4100; 4101; GFX940-LABEL: s_shuffle_v2f16_v3f16__u_5: 4102; GFX940: ; %bb.0: 4103; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4104; GFX940-NEXT: ;;#ASMSTART 4105; GFX940-NEXT: ; def s[0:1] 4106; GFX940-NEXT: ;;#ASMEND 4107; GFX940-NEXT: s_lshl_b32 s8, s1, 16 4108; GFX940-NEXT: ;;#ASMSTART 4109; GFX940-NEXT: ; use s8 4110; GFX940-NEXT: ;;#ASMEND 4111; GFX940-NEXT: s_setpc_b64 s[30:31] 4112 %vec0 = call <4 x half> asm "; def $0", "=s"() 4113 %vec1 = call <4 x half> asm "; def $0", "=s"() 4114 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4115 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4116 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 poison, i32 5> 4117 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 4118 ret void 4119} 4120 4121define void @s_shuffle_v2f16_v3f16__0_5() { 4122; GFX900-LABEL: s_shuffle_v2f16_v3f16__0_5: 4123; GFX900: ; %bb.0: 4124; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4125; GFX900-NEXT: ;;#ASMSTART 4126; GFX900-NEXT: ; def s[4:5] 4127; GFX900-NEXT: ;;#ASMEND 4128; GFX900-NEXT: ;;#ASMSTART 4129; GFX900-NEXT: ; def s[6:7] 4130; GFX900-NEXT: ;;#ASMEND 4131; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 4132; GFX900-NEXT: ;;#ASMSTART 4133; GFX900-NEXT: ; use s8 4134; GFX900-NEXT: ;;#ASMEND 4135; GFX900-NEXT: s_setpc_b64 s[30:31] 4136; 4137; GFX90A-LABEL: s_shuffle_v2f16_v3f16__0_5: 4138; GFX90A: ; %bb.0: 4139; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4140; GFX90A-NEXT: ;;#ASMSTART 4141; GFX90A-NEXT: ; def s[4:5] 4142; GFX90A-NEXT: ;;#ASMEND 4143; GFX90A-NEXT: ;;#ASMSTART 4144; GFX90A-NEXT: ; def s[6:7] 4145; GFX90A-NEXT: ;;#ASMEND 4146; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 4147; GFX90A-NEXT: ;;#ASMSTART 4148; GFX90A-NEXT: ; use s8 4149; GFX90A-NEXT: ;;#ASMEND 4150; GFX90A-NEXT: s_setpc_b64 s[30:31] 4151; 4152; GFX940-LABEL: s_shuffle_v2f16_v3f16__0_5: 4153; GFX940: ; %bb.0: 4154; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4155; GFX940-NEXT: ;;#ASMSTART 4156; GFX940-NEXT: ; def s[0:1] 4157; GFX940-NEXT: ;;#ASMEND 4158; GFX940-NEXT: ;;#ASMSTART 4159; GFX940-NEXT: ; def s[2:3] 4160; GFX940-NEXT: ;;#ASMEND 4161; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 4162; GFX940-NEXT: ;;#ASMSTART 4163; GFX940-NEXT: ; use s8 4164; GFX940-NEXT: ;;#ASMEND 4165; GFX940-NEXT: s_setpc_b64 s[30:31] 4166 %vec0 = call <4 x half> asm "; def $0", "=s"() 4167 %vec1 = call <4 x half> asm "; def $0", "=s"() 4168 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4169 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4170 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 0, i32 5> 4171 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 4172 ret void 4173} 4174 4175define void @s_shuffle_v2f16_v3f16__1_5() { 4176; GFX900-LABEL: s_shuffle_v2f16_v3f16__1_5: 4177; GFX900: ; %bb.0: 4178; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4179; GFX900-NEXT: ;;#ASMSTART 4180; GFX900-NEXT: ; def s[4:5] 4181; GFX900-NEXT: ;;#ASMEND 4182; GFX900-NEXT: s_lshr_b32 s4, s4, 16 4183; GFX900-NEXT: ;;#ASMSTART 4184; GFX900-NEXT: ; def s[6:7] 4185; GFX900-NEXT: ;;#ASMEND 4186; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 4187; GFX900-NEXT: ;;#ASMSTART 4188; GFX900-NEXT: ; use s8 4189; GFX900-NEXT: ;;#ASMEND 4190; GFX900-NEXT: s_setpc_b64 s[30:31] 4191; 4192; GFX90A-LABEL: s_shuffle_v2f16_v3f16__1_5: 4193; GFX90A: ; %bb.0: 4194; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4195; GFX90A-NEXT: ;;#ASMSTART 4196; GFX90A-NEXT: ; def s[4:5] 4197; GFX90A-NEXT: ;;#ASMEND 4198; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 4199; GFX90A-NEXT: ;;#ASMSTART 4200; GFX90A-NEXT: ; def s[6:7] 4201; GFX90A-NEXT: ;;#ASMEND 4202; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 4203; GFX90A-NEXT: ;;#ASMSTART 4204; GFX90A-NEXT: ; use s8 4205; GFX90A-NEXT: ;;#ASMEND 4206; GFX90A-NEXT: s_setpc_b64 s[30:31] 4207; 4208; GFX940-LABEL: s_shuffle_v2f16_v3f16__1_5: 4209; GFX940: ; %bb.0: 4210; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4211; GFX940-NEXT: ;;#ASMSTART 4212; GFX940-NEXT: ; def s[0:1] 4213; GFX940-NEXT: ;;#ASMEND 4214; GFX940-NEXT: s_lshr_b32 s0, s0, 16 4215; GFX940-NEXT: ;;#ASMSTART 4216; GFX940-NEXT: ; def s[2:3] 4217; GFX940-NEXT: ;;#ASMEND 4218; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 4219; GFX940-NEXT: ;;#ASMSTART 4220; GFX940-NEXT: ; use s8 4221; GFX940-NEXT: ;;#ASMEND 4222; GFX940-NEXT: s_setpc_b64 s[30:31] 4223 %vec0 = call <4 x half> asm "; def $0", "=s"() 4224 %vec1 = call <4 x half> asm "; def $0", "=s"() 4225 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4226 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4227 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 1, i32 5> 4228 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 4229 ret void 4230} 4231 4232define void @s_shuffle_v2f16_v3f16__2_5() { 4233; GFX900-LABEL: s_shuffle_v2f16_v3f16__2_5: 4234; GFX900: ; %bb.0: 4235; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4236; GFX900-NEXT: ;;#ASMSTART 4237; GFX900-NEXT: ; def s[4:5] 4238; GFX900-NEXT: ;;#ASMEND 4239; GFX900-NEXT: ;;#ASMSTART 4240; GFX900-NEXT: ; def s[6:7] 4241; GFX900-NEXT: ;;#ASMEND 4242; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 4243; GFX900-NEXT: ;;#ASMSTART 4244; GFX900-NEXT: ; use s8 4245; GFX900-NEXT: ;;#ASMEND 4246; GFX900-NEXT: s_setpc_b64 s[30:31] 4247; 4248; GFX90A-LABEL: s_shuffle_v2f16_v3f16__2_5: 4249; GFX90A: ; %bb.0: 4250; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4251; GFX90A-NEXT: ;;#ASMSTART 4252; GFX90A-NEXT: ; def s[4:5] 4253; GFX90A-NEXT: ;;#ASMEND 4254; GFX90A-NEXT: ;;#ASMSTART 4255; GFX90A-NEXT: ; def s[6:7] 4256; GFX90A-NEXT: ;;#ASMEND 4257; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 4258; GFX90A-NEXT: ;;#ASMSTART 4259; GFX90A-NEXT: ; use s8 4260; GFX90A-NEXT: ;;#ASMEND 4261; GFX90A-NEXT: s_setpc_b64 s[30:31] 4262; 4263; GFX940-LABEL: s_shuffle_v2f16_v3f16__2_5: 4264; GFX940: ; %bb.0: 4265; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4266; GFX940-NEXT: ;;#ASMSTART 4267; GFX940-NEXT: ; def s[0:1] 4268; GFX940-NEXT: ;;#ASMEND 4269; GFX940-NEXT: ;;#ASMSTART 4270; GFX940-NEXT: ; def s[2:3] 4271; GFX940-NEXT: ;;#ASMEND 4272; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 4273; GFX940-NEXT: ;;#ASMSTART 4274; GFX940-NEXT: ; use s8 4275; GFX940-NEXT: ;;#ASMEND 4276; GFX940-NEXT: s_setpc_b64 s[30:31] 4277 %vec0 = call <4 x half> asm "; def $0", "=s"() 4278 %vec1 = call <4 x half> asm "; def $0", "=s"() 4279 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4280 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4281 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 2, i32 5> 4282 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 4283 ret void 4284} 4285 4286define void @s_shuffle_v2f16_v3f16__3_5() { 4287; GFX900-LABEL: s_shuffle_v2f16_v3f16__3_5: 4288; GFX900: ; %bb.0: 4289; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4290; GFX900-NEXT: ;;#ASMSTART 4291; GFX900-NEXT: ; def s[4:5] 4292; GFX900-NEXT: ;;#ASMEND 4293; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 4294; GFX900-NEXT: ;;#ASMSTART 4295; GFX900-NEXT: ; use s8 4296; GFX900-NEXT: ;;#ASMEND 4297; GFX900-NEXT: s_setpc_b64 s[30:31] 4298; 4299; GFX90A-LABEL: s_shuffle_v2f16_v3f16__3_5: 4300; GFX90A: ; %bb.0: 4301; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4302; GFX90A-NEXT: ;;#ASMSTART 4303; GFX90A-NEXT: ; def s[4:5] 4304; GFX90A-NEXT: ;;#ASMEND 4305; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 4306; GFX90A-NEXT: ;;#ASMSTART 4307; GFX90A-NEXT: ; use s8 4308; GFX90A-NEXT: ;;#ASMEND 4309; GFX90A-NEXT: s_setpc_b64 s[30:31] 4310; 4311; GFX940-LABEL: s_shuffle_v2f16_v3f16__3_5: 4312; GFX940: ; %bb.0: 4313; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4314; GFX940-NEXT: ;;#ASMSTART 4315; GFX940-NEXT: ; def s[0:1] 4316; GFX940-NEXT: ;;#ASMEND 4317; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 4318; GFX940-NEXT: ;;#ASMSTART 4319; GFX940-NEXT: ; use s8 4320; GFX940-NEXT: ;;#ASMEND 4321; GFX940-NEXT: s_setpc_b64 s[30:31] 4322 %vec0 = call <4 x half> asm "; def $0", "=s"() 4323 %vec1 = call <4 x half> asm "; def $0", "=s"() 4324 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4325 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4326 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 3, i32 5> 4327 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 4328 ret void 4329} 4330 4331define void @s_shuffle_v2f16_v3f16__4_5() { 4332; GFX900-LABEL: s_shuffle_v2f16_v3f16__4_5: 4333; GFX900: ; %bb.0: 4334; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4335; GFX900-NEXT: ;;#ASMSTART 4336; GFX900-NEXT: ; def s[4:5] 4337; GFX900-NEXT: ;;#ASMEND 4338; GFX900-NEXT: s_lshr_b32 s4, s4, 16 4339; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 4340; GFX900-NEXT: ;;#ASMSTART 4341; GFX900-NEXT: ; use s8 4342; GFX900-NEXT: ;;#ASMEND 4343; GFX900-NEXT: s_setpc_b64 s[30:31] 4344; 4345; GFX90A-LABEL: s_shuffle_v2f16_v3f16__4_5: 4346; GFX90A: ; %bb.0: 4347; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4348; GFX90A-NEXT: ;;#ASMSTART 4349; GFX90A-NEXT: ; def s[4:5] 4350; GFX90A-NEXT: ;;#ASMEND 4351; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 4352; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 4353; GFX90A-NEXT: ;;#ASMSTART 4354; GFX90A-NEXT: ; use s8 4355; GFX90A-NEXT: ;;#ASMEND 4356; GFX90A-NEXT: s_setpc_b64 s[30:31] 4357; 4358; GFX940-LABEL: s_shuffle_v2f16_v3f16__4_5: 4359; GFX940: ; %bb.0: 4360; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4361; GFX940-NEXT: ;;#ASMSTART 4362; GFX940-NEXT: ; def s[0:1] 4363; GFX940-NEXT: ;;#ASMEND 4364; GFX940-NEXT: s_lshr_b32 s0, s0, 16 4365; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 4366; GFX940-NEXT: ;;#ASMSTART 4367; GFX940-NEXT: ; use s8 4368; GFX940-NEXT: ;;#ASMEND 4369; GFX940-NEXT: s_setpc_b64 s[30:31] 4370 %vec0 = call <4 x half> asm "; def $0", "=s"() 4371 %vec1 = call <4 x half> asm "; def $0", "=s"() 4372 %extract3 = shufflevector <4 x half> %vec0, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4373 %extract31 = shufflevector <4 x half> %vec1, <4 x half> poison, <3 x i32> <i32 0, i32 1, i32 2> 4374 %shuf = shufflevector <3 x half> %extract3, <3 x half> %extract31, <2 x i32> <i32 4, i32 5> 4375 call void asm sideeffect "; use $0", "{s8}"(<2 x half> %shuf) 4376 ret void 4377} 4378;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 4379; GFX90APLUS: {{.*}} 4380