1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900 %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX90A %s 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GFX9,GFX90APLUS,GFX940 %s 5 6 7define void @v_shuffle_v2bf16_v4bf16__u_u(ptr addrspace(1) inreg %ptr) { 8; GFX9-LABEL: v_shuffle_v2bf16_v4bf16__u_u: 9; GFX9: ; %bb.0: 10; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 11; GFX9-NEXT: s_setpc_b64 s[30:31] 12 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 13 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> poison 14 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 15 ret void 16} 17 18define void @v_shuffle_v2bf16_v4bf16__0_u(ptr addrspace(1) inreg %ptr) { 19; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__0_u: 20; GFX900: ; %bb.0: 21; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 22; GFX900-NEXT: v_mov_b32_e32 v2, 0 23; GFX900-NEXT: ;;#ASMSTART 24; GFX900-NEXT: ; def v[0:1] 25; GFX900-NEXT: ;;#ASMEND 26; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 27; GFX900-NEXT: s_waitcnt vmcnt(0) 28; GFX900-NEXT: s_setpc_b64 s[30:31] 29; 30; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__0_u: 31; GFX90A: ; %bb.0: 32; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 33; GFX90A-NEXT: v_mov_b32_e32 v2, 0 34; GFX90A-NEXT: ;;#ASMSTART 35; GFX90A-NEXT: ; def v[0:1] 36; GFX90A-NEXT: ;;#ASMEND 37; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 38; GFX90A-NEXT: s_waitcnt vmcnt(0) 39; GFX90A-NEXT: s_setpc_b64 s[30:31] 40; 41; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_u: 42; GFX940: ; %bb.0: 43; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 44; GFX940-NEXT: v_mov_b32_e32 v2, 0 45; GFX940-NEXT: ;;#ASMSTART 46; GFX940-NEXT: ; def v[0:1] 47; GFX940-NEXT: ;;#ASMEND 48; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 49; GFX940-NEXT: s_waitcnt vmcnt(0) 50; GFX940-NEXT: s_setpc_b64 s[30:31] 51 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 52 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 poison> 53 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 54 ret void 55} 56 57define void @v_shuffle_v2bf16_v4bf16__1_u(ptr addrspace(1) inreg %ptr) { 58; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__1_u: 59; GFX900: ; %bb.0: 60; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 61; GFX900-NEXT: ;;#ASMSTART 62; GFX900-NEXT: ; def v[0:1] 63; GFX900-NEXT: ;;#ASMEND 64; GFX900-NEXT: v_mov_b32_e32 v2, 0 65; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 66; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 67; GFX900-NEXT: s_waitcnt vmcnt(0) 68; GFX900-NEXT: s_setpc_b64 s[30:31] 69; 70; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__1_u: 71; GFX90A: ; %bb.0: 72; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 73; GFX90A-NEXT: ;;#ASMSTART 74; GFX90A-NEXT: ; def v[0:1] 75; GFX90A-NEXT: ;;#ASMEND 76; GFX90A-NEXT: v_mov_b32_e32 v2, 0 77; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 78; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 79; GFX90A-NEXT: s_waitcnt vmcnt(0) 80; GFX90A-NEXT: s_setpc_b64 s[30:31] 81; 82; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_u: 83; GFX940: ; %bb.0: 84; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 85; GFX940-NEXT: ;;#ASMSTART 86; GFX940-NEXT: ; def v[0:1] 87; GFX940-NEXT: ;;#ASMEND 88; GFX940-NEXT: v_mov_b32_e32 v2, 0 89; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 90; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 91; GFX940-NEXT: s_waitcnt vmcnt(0) 92; GFX940-NEXT: s_setpc_b64 s[30:31] 93 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 94 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 poison> 95 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 96 ret void 97} 98 99define void @v_shuffle_v2bf16_v4bf16__2_u(ptr addrspace(1) inreg %ptr) { 100; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__2_u: 101; GFX900: ; %bb.0: 102; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 103; GFX900-NEXT: v_mov_b32_e32 v2, 0 104; GFX900-NEXT: ;;#ASMSTART 105; GFX900-NEXT: ; def v[0:1] 106; GFX900-NEXT: ;;#ASMEND 107; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 108; GFX900-NEXT: s_waitcnt vmcnt(0) 109; GFX900-NEXT: s_setpc_b64 s[30:31] 110; 111; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__2_u: 112; GFX90A: ; %bb.0: 113; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 114; GFX90A-NEXT: v_mov_b32_e32 v2, 0 115; GFX90A-NEXT: ;;#ASMSTART 116; GFX90A-NEXT: ; def v[0:1] 117; GFX90A-NEXT: ;;#ASMEND 118; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 119; GFX90A-NEXT: s_waitcnt vmcnt(0) 120; GFX90A-NEXT: s_setpc_b64 s[30:31] 121; 122; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_u: 123; GFX940: ; %bb.0: 124; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 125; GFX940-NEXT: v_mov_b32_e32 v2, 0 126; GFX940-NEXT: ;;#ASMSTART 127; GFX940-NEXT: ; def v[0:1] 128; GFX940-NEXT: ;;#ASMEND 129; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 130; GFX940-NEXT: s_waitcnt vmcnt(0) 131; GFX940-NEXT: s_setpc_b64 s[30:31] 132 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 133 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 poison> 134 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 135 ret void 136} 137 138define void @v_shuffle_v2bf16_v4bf16__3_u(ptr addrspace(1) inreg %ptr) { 139; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__3_u: 140; GFX900: ; %bb.0: 141; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 142; GFX900-NEXT: ;;#ASMSTART 143; GFX900-NEXT: ; def v[0:1] 144; GFX900-NEXT: ;;#ASMEND 145; GFX900-NEXT: v_mov_b32_e32 v2, 0 146; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 147; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 148; GFX900-NEXT: s_waitcnt vmcnt(0) 149; GFX900-NEXT: s_setpc_b64 s[30:31] 150; 151; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__3_u: 152; GFX90A: ; %bb.0: 153; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 154; GFX90A-NEXT: ;;#ASMSTART 155; GFX90A-NEXT: ; def v[0:1] 156; GFX90A-NEXT: ;;#ASMEND 157; GFX90A-NEXT: v_mov_b32_e32 v2, 0 158; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 159; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 160; GFX90A-NEXT: s_waitcnt vmcnt(0) 161; GFX90A-NEXT: s_setpc_b64 s[30:31] 162; 163; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_u: 164; GFX940: ; %bb.0: 165; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 166; GFX940-NEXT: ;;#ASMSTART 167; GFX940-NEXT: ; def v[0:1] 168; GFX940-NEXT: ;;#ASMEND 169; GFX940-NEXT: v_mov_b32_e32 v2, 0 170; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 171; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 172; GFX940-NEXT: s_waitcnt vmcnt(0) 173; GFX940-NEXT: s_setpc_b64 s[30:31] 174 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 175 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 poison> 176 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 177 ret void 178} 179 180define void @v_shuffle_v2bf16_v4bf16__4_u(ptr addrspace(1) inreg %ptr) { 181; GFX9-LABEL: v_shuffle_v2bf16_v4bf16__4_u: 182; GFX9: ; %bb.0: 183; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 184; GFX9-NEXT: s_setpc_b64 s[30:31] 185 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 186 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 poison> 187 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 188 ret void 189} 190 191define void @v_shuffle_v2bf16_v4bf16__5_u(ptr addrspace(1) inreg %ptr) { 192; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__5_u: 193; GFX900: ; %bb.0: 194; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 195; GFX900-NEXT: ;;#ASMSTART 196; GFX900-NEXT: ; def v[0:1] 197; GFX900-NEXT: ;;#ASMEND 198; GFX900-NEXT: v_mov_b32_e32 v2, 0 199; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 200; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 201; GFX900-NEXT: s_waitcnt vmcnt(0) 202; GFX900-NEXT: s_setpc_b64 s[30:31] 203; 204; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__5_u: 205; GFX90A: ; %bb.0: 206; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 207; GFX90A-NEXT: ;;#ASMSTART 208; GFX90A-NEXT: ; def v[0:1] 209; GFX90A-NEXT: ;;#ASMEND 210; GFX90A-NEXT: v_mov_b32_e32 v2, 0 211; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 212; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 213; GFX90A-NEXT: s_waitcnt vmcnt(0) 214; GFX90A-NEXT: s_setpc_b64 s[30:31] 215; 216; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_u: 217; GFX940: ; %bb.0: 218; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 219; GFX940-NEXT: ;;#ASMSTART 220; GFX940-NEXT: ; def v[0:1] 221; GFX940-NEXT: ;;#ASMEND 222; GFX940-NEXT: v_mov_b32_e32 v2, 0 223; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 224; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 225; GFX940-NEXT: s_waitcnt vmcnt(0) 226; GFX940-NEXT: s_setpc_b64 s[30:31] 227 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 228 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 229 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 poison> 230 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 231 ret void 232} 233 234define void @v_shuffle_v2bf16_v4bf16__6_u(ptr addrspace(1) inreg %ptr) { 235; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__6_u: 236; GFX900: ; %bb.0: 237; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GFX900-NEXT: v_mov_b32_e32 v2, 0 239; GFX900-NEXT: ;;#ASMSTART 240; GFX900-NEXT: ; def v[0:1] 241; GFX900-NEXT: ;;#ASMEND 242; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 243; GFX900-NEXT: s_waitcnt vmcnt(0) 244; GFX900-NEXT: s_setpc_b64 s[30:31] 245; 246; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__6_u: 247; GFX90A: ; %bb.0: 248; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 249; GFX90A-NEXT: v_mov_b32_e32 v2, 0 250; GFX90A-NEXT: ;;#ASMSTART 251; GFX90A-NEXT: ; def v[0:1] 252; GFX90A-NEXT: ;;#ASMEND 253; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 254; GFX90A-NEXT: s_waitcnt vmcnt(0) 255; GFX90A-NEXT: s_setpc_b64 s[30:31] 256; 257; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_u: 258; GFX940: ; %bb.0: 259; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 260; GFX940-NEXT: v_mov_b32_e32 v2, 0 261; GFX940-NEXT: ;;#ASMSTART 262; GFX940-NEXT: ; def v[0:1] 263; GFX940-NEXT: ;;#ASMEND 264; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 265; GFX940-NEXT: s_waitcnt vmcnt(0) 266; GFX940-NEXT: s_setpc_b64 s[30:31] 267 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 268 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 269 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 poison> 270 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 271 ret void 272} 273 274define void @v_shuffle_v2bf16_v4bf16__7_u(ptr addrspace(1) inreg %ptr) { 275; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__7_u: 276; GFX900: ; %bb.0: 277; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 278; GFX900-NEXT: ;;#ASMSTART 279; GFX900-NEXT: ; def v[0:1] 280; GFX900-NEXT: ;;#ASMEND 281; GFX900-NEXT: v_mov_b32_e32 v2, 0 282; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 283; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 284; GFX900-NEXT: s_waitcnt vmcnt(0) 285; GFX900-NEXT: s_setpc_b64 s[30:31] 286; 287; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__7_u: 288; GFX90A: ; %bb.0: 289; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 290; GFX90A-NEXT: ;;#ASMSTART 291; GFX90A-NEXT: ; def v[0:1] 292; GFX90A-NEXT: ;;#ASMEND 293; GFX90A-NEXT: v_mov_b32_e32 v2, 0 294; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 295; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 296; GFX90A-NEXT: s_waitcnt vmcnt(0) 297; GFX90A-NEXT: s_setpc_b64 s[30:31] 298; 299; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_u: 300; GFX940: ; %bb.0: 301; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; GFX940-NEXT: ;;#ASMSTART 303; GFX940-NEXT: ; def v[0:1] 304; GFX940-NEXT: ;;#ASMEND 305; GFX940-NEXT: v_mov_b32_e32 v2, 0 306; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 307; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 308; GFX940-NEXT: s_waitcnt vmcnt(0) 309; GFX940-NEXT: s_setpc_b64 s[30:31] 310 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 311 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 312 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 poison> 313 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 314 ret void 315} 316 317define void @v_shuffle_v2bf16_v4bf16__7_0(ptr addrspace(1) inreg %ptr) { 318; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__7_0: 319; GFX900: ; %bb.0: 320; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 321; GFX900-NEXT: ;;#ASMSTART 322; GFX900-NEXT: ; def v[0:1] 323; GFX900-NEXT: ;;#ASMEND 324; GFX900-NEXT: v_mov_b32_e32 v3, 0 325; GFX900-NEXT: ;;#ASMSTART 326; GFX900-NEXT: ; def v[1:2] 327; GFX900-NEXT: ;;#ASMEND 328; GFX900-NEXT: v_alignbit_b32 v0, v0, v2, 16 329; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 330; GFX900-NEXT: s_waitcnt vmcnt(0) 331; GFX900-NEXT: s_setpc_b64 s[30:31] 332; 333; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__7_0: 334; GFX90A: ; %bb.0: 335; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 336; GFX90A-NEXT: ;;#ASMSTART 337; GFX90A-NEXT: ; def v[0:1] 338; GFX90A-NEXT: ;;#ASMEND 339; GFX90A-NEXT: v_mov_b32_e32 v4, 0 340; GFX90A-NEXT: ;;#ASMSTART 341; GFX90A-NEXT: ; def v[2:3] 342; GFX90A-NEXT: ;;#ASMEND 343; GFX90A-NEXT: v_alignbit_b32 v0, v0, v3, 16 344; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 345; GFX90A-NEXT: s_waitcnt vmcnt(0) 346; GFX90A-NEXT: s_setpc_b64 s[30:31] 347; 348; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_0: 349; GFX940: ; %bb.0: 350; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 351; GFX940-NEXT: ;;#ASMSTART 352; GFX940-NEXT: ; def v[0:1] 353; GFX940-NEXT: ;;#ASMEND 354; GFX940-NEXT: v_mov_b32_e32 v4, 0 355; GFX940-NEXT: ;;#ASMSTART 356; GFX940-NEXT: ; def v[2:3] 357; GFX940-NEXT: ;;#ASMEND 358; GFX940-NEXT: s_nop 0 359; GFX940-NEXT: v_alignbit_b32 v0, v0, v3, 16 360; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 361; GFX940-NEXT: s_waitcnt vmcnt(0) 362; GFX940-NEXT: s_setpc_b64 s[30:31] 363 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 364 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 365 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 0> 366 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 367 ret void 368} 369 370define void @v_shuffle_v2bf16_v4bf16__7_1(ptr addrspace(1) inreg %ptr) { 371; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__7_1: 372; GFX900: ; %bb.0: 373; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 374; GFX900-NEXT: ;;#ASMSTART 375; GFX900-NEXT: ; def v[0:1] 376; GFX900-NEXT: ;;#ASMEND 377; GFX900-NEXT: s_mov_b32 s4, 0x7060302 378; GFX900-NEXT: v_mov_b32_e32 v3, 0 379; GFX900-NEXT: ;;#ASMSTART 380; GFX900-NEXT: ; def v[1:2] 381; GFX900-NEXT: ;;#ASMEND 382; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 383; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 384; GFX900-NEXT: s_waitcnt vmcnt(0) 385; GFX900-NEXT: s_setpc_b64 s[30:31] 386; 387; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__7_1: 388; GFX90A: ; %bb.0: 389; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 390; GFX90A-NEXT: ;;#ASMSTART 391; GFX90A-NEXT: ; def v[0:1] 392; GFX90A-NEXT: ;;#ASMEND 393; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 394; GFX90A-NEXT: v_mov_b32_e32 v4, 0 395; GFX90A-NEXT: ;;#ASMSTART 396; GFX90A-NEXT: ; def v[2:3] 397; GFX90A-NEXT: ;;#ASMEND 398; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 399; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 400; GFX90A-NEXT: s_waitcnt vmcnt(0) 401; GFX90A-NEXT: s_setpc_b64 s[30:31] 402; 403; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_1: 404; GFX940: ; %bb.0: 405; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 406; GFX940-NEXT: ;;#ASMSTART 407; GFX940-NEXT: ; def v[0:1] 408; GFX940-NEXT: ;;#ASMEND 409; GFX940-NEXT: s_mov_b32 s2, 0x7060302 410; GFX940-NEXT: v_mov_b32_e32 v4, 0 411; GFX940-NEXT: ;;#ASMSTART 412; GFX940-NEXT: ; def v[2:3] 413; GFX940-NEXT: ;;#ASMEND 414; GFX940-NEXT: s_nop 0 415; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 416; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 417; GFX940-NEXT: s_waitcnt vmcnt(0) 418; GFX940-NEXT: s_setpc_b64 s[30:31] 419 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 420 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 421 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 1> 422 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 423 ret void 424} 425 426define void @v_shuffle_v2bf16_v4bf16__7_2(ptr addrspace(1) inreg %ptr) { 427; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__7_2: 428; GFX900: ; %bb.0: 429; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 430; GFX900-NEXT: ;;#ASMSTART 431; GFX900-NEXT: ; def v[0:1] 432; GFX900-NEXT: ;;#ASMEND 433; GFX900-NEXT: v_mov_b32_e32 v4, 0 434; GFX900-NEXT: ;;#ASMSTART 435; GFX900-NEXT: ; def v[2:3] 436; GFX900-NEXT: ;;#ASMEND 437; GFX900-NEXT: v_alignbit_b32 v0, v1, v3, 16 438; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 439; GFX900-NEXT: s_waitcnt vmcnt(0) 440; GFX900-NEXT: s_setpc_b64 s[30:31] 441; 442; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__7_2: 443; GFX90A: ; %bb.0: 444; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 445; GFX90A-NEXT: ;;#ASMSTART 446; GFX90A-NEXT: ; def v[0:1] 447; GFX90A-NEXT: ;;#ASMEND 448; GFX90A-NEXT: v_mov_b32_e32 v4, 0 449; GFX90A-NEXT: ;;#ASMSTART 450; GFX90A-NEXT: ; def v[2:3] 451; GFX90A-NEXT: ;;#ASMEND 452; GFX90A-NEXT: v_alignbit_b32 v0, v1, v3, 16 453; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 454; GFX90A-NEXT: s_waitcnt vmcnt(0) 455; GFX90A-NEXT: s_setpc_b64 s[30:31] 456; 457; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_2: 458; GFX940: ; %bb.0: 459; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 460; GFX940-NEXT: ;;#ASMSTART 461; GFX940-NEXT: ; def v[0:1] 462; GFX940-NEXT: ;;#ASMEND 463; GFX940-NEXT: v_mov_b32_e32 v4, 0 464; GFX940-NEXT: ;;#ASMSTART 465; GFX940-NEXT: ; def v[2:3] 466; GFX940-NEXT: ;;#ASMEND 467; GFX940-NEXT: s_nop 0 468; GFX940-NEXT: v_alignbit_b32 v0, v1, v3, 16 469; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 470; GFX940-NEXT: s_waitcnt vmcnt(0) 471; GFX940-NEXT: s_setpc_b64 s[30:31] 472 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 473 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 474 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 2> 475 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 476 ret void 477} 478 479define void @v_shuffle_v2bf16_v4bf16__7_3(ptr addrspace(1) inreg %ptr) { 480; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__7_3: 481; GFX900: ; %bb.0: 482; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 483; GFX900-NEXT: ;;#ASMSTART 484; GFX900-NEXT: ; def v[0:1] 485; GFX900-NEXT: ;;#ASMEND 486; GFX900-NEXT: s_mov_b32 s4, 0x7060302 487; GFX900-NEXT: v_mov_b32_e32 v4, 0 488; GFX900-NEXT: ;;#ASMSTART 489; GFX900-NEXT: ; def v[2:3] 490; GFX900-NEXT: ;;#ASMEND 491; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 492; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 493; GFX900-NEXT: s_waitcnt vmcnt(0) 494; GFX900-NEXT: s_setpc_b64 s[30:31] 495; 496; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__7_3: 497; GFX90A: ; %bb.0: 498; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 499; GFX90A-NEXT: ;;#ASMSTART 500; GFX90A-NEXT: ; def v[0:1] 501; GFX90A-NEXT: ;;#ASMEND 502; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 503; GFX90A-NEXT: v_mov_b32_e32 v4, 0 504; GFX90A-NEXT: ;;#ASMSTART 505; GFX90A-NEXT: ; def v[2:3] 506; GFX90A-NEXT: ;;#ASMEND 507; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 508; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 509; GFX90A-NEXT: s_waitcnt vmcnt(0) 510; GFX90A-NEXT: s_setpc_b64 s[30:31] 511; 512; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_3: 513; GFX940: ; %bb.0: 514; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 515; GFX940-NEXT: ;;#ASMSTART 516; GFX940-NEXT: ; def v[0:1] 517; GFX940-NEXT: ;;#ASMEND 518; GFX940-NEXT: s_mov_b32 s2, 0x7060302 519; GFX940-NEXT: v_mov_b32_e32 v4, 0 520; GFX940-NEXT: ;;#ASMSTART 521; GFX940-NEXT: ; def v[2:3] 522; GFX940-NEXT: ;;#ASMEND 523; GFX940-NEXT: s_nop 0 524; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 525; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 526; GFX940-NEXT: s_waitcnt vmcnt(0) 527; GFX940-NEXT: s_setpc_b64 s[30:31] 528 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 529 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 530 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 3> 531 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 532 ret void 533} 534 535define void @v_shuffle_v2bf16_v4bf16__7_4(ptr addrspace(1) inreg %ptr) { 536; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__7_4: 537; GFX900: ; %bb.0: 538; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 539; GFX900-NEXT: ;;#ASMSTART 540; GFX900-NEXT: ; def v[0:1] 541; GFX900-NEXT: ;;#ASMEND 542; GFX900-NEXT: v_mov_b32_e32 v2, 0 543; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 544; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 545; GFX900-NEXT: s_waitcnt vmcnt(0) 546; GFX900-NEXT: s_setpc_b64 s[30:31] 547; 548; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__7_4: 549; GFX90A: ; %bb.0: 550; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 551; GFX90A-NEXT: ;;#ASMSTART 552; GFX90A-NEXT: ; def v[0:1] 553; GFX90A-NEXT: ;;#ASMEND 554; GFX90A-NEXT: v_mov_b32_e32 v2, 0 555; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 556; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 557; GFX90A-NEXT: s_waitcnt vmcnt(0) 558; GFX90A-NEXT: s_setpc_b64 s[30:31] 559; 560; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_4: 561; GFX940: ; %bb.0: 562; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 563; GFX940-NEXT: ;;#ASMSTART 564; GFX940-NEXT: ; def v[0:1] 565; GFX940-NEXT: ;;#ASMEND 566; GFX940-NEXT: v_mov_b32_e32 v2, 0 567; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 568; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 569; GFX940-NEXT: s_waitcnt vmcnt(0) 570; GFX940-NEXT: s_setpc_b64 s[30:31] 571 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 572 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 573 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 4> 574 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 575 ret void 576} 577 578define void @v_shuffle_v2bf16_v4bf16__7_5(ptr addrspace(1) inreg %ptr) { 579; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__7_5: 580; GFX900: ; %bb.0: 581; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 582; GFX900-NEXT: ;;#ASMSTART 583; GFX900-NEXT: ; def v[0:1] 584; GFX900-NEXT: ;;#ASMEND 585; GFX900-NEXT: s_mov_b32 s4, 0x7060302 586; GFX900-NEXT: v_mov_b32_e32 v2, 0 587; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 588; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 589; GFX900-NEXT: s_waitcnt vmcnt(0) 590; GFX900-NEXT: s_setpc_b64 s[30:31] 591; 592; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__7_5: 593; GFX90A: ; %bb.0: 594; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 595; GFX90A-NEXT: ;;#ASMSTART 596; GFX90A-NEXT: ; def v[0:1] 597; GFX90A-NEXT: ;;#ASMEND 598; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 599; GFX90A-NEXT: v_mov_b32_e32 v2, 0 600; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 601; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 602; GFX90A-NEXT: s_waitcnt vmcnt(0) 603; GFX90A-NEXT: s_setpc_b64 s[30:31] 604; 605; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_5: 606; GFX940: ; %bb.0: 607; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 608; GFX940-NEXT: ;;#ASMSTART 609; GFX940-NEXT: ; def v[0:1] 610; GFX940-NEXT: ;;#ASMEND 611; GFX940-NEXT: s_mov_b32 s2, 0x7060302 612; GFX940-NEXT: v_mov_b32_e32 v2, 0 613; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 614; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 615; GFX940-NEXT: s_waitcnt vmcnt(0) 616; GFX940-NEXT: s_setpc_b64 s[30:31] 617 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 618 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 619 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 5> 620 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 621 ret void 622} 623 624define void @v_shuffle_v2bf16_v4bf16__7_6(ptr addrspace(1) inreg %ptr) { 625; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__7_6: 626; GFX900: ; %bb.0: 627; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 628; GFX900-NEXT: ;;#ASMSTART 629; GFX900-NEXT: ; def v[0:1] 630; GFX900-NEXT: ;;#ASMEND 631; GFX900-NEXT: v_mov_b32_e32 v2, 0 632; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 633; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 634; GFX900-NEXT: s_waitcnt vmcnt(0) 635; GFX900-NEXT: s_setpc_b64 s[30:31] 636; 637; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__7_6: 638; GFX90A: ; %bb.0: 639; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 640; GFX90A-NEXT: ;;#ASMSTART 641; GFX90A-NEXT: ; def v[0:1] 642; GFX90A-NEXT: ;;#ASMEND 643; GFX90A-NEXT: v_mov_b32_e32 v2, 0 644; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 645; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 646; GFX90A-NEXT: s_waitcnt vmcnt(0) 647; GFX90A-NEXT: s_setpc_b64 s[30:31] 648; 649; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_6: 650; GFX940: ; %bb.0: 651; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 652; GFX940-NEXT: ;;#ASMSTART 653; GFX940-NEXT: ; def v[0:1] 654; GFX940-NEXT: ;;#ASMEND 655; GFX940-NEXT: v_mov_b32_e32 v2, 0 656; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 657; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 658; GFX940-NEXT: s_waitcnt vmcnt(0) 659; GFX940-NEXT: s_setpc_b64 s[30:31] 660 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 661 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 662 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 6> 663 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 664 ret void 665} 666 667define void @v_shuffle_v2bf16_v4bf16__7_7(ptr addrspace(1) inreg %ptr) { 668; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__7_7: 669; GFX900: ; %bb.0: 670; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 671; GFX900-NEXT: ;;#ASMSTART 672; GFX900-NEXT: ; def v[0:1] 673; GFX900-NEXT: ;;#ASMEND 674; GFX900-NEXT: s_mov_b32 s4, 0x7060302 675; GFX900-NEXT: v_mov_b32_e32 v2, 0 676; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 677; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 678; GFX900-NEXT: s_waitcnt vmcnt(0) 679; GFX900-NEXT: s_setpc_b64 s[30:31] 680; 681; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__7_7: 682; GFX90A: ; %bb.0: 683; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 684; GFX90A-NEXT: ;;#ASMSTART 685; GFX90A-NEXT: ; def v[0:1] 686; GFX90A-NEXT: ;;#ASMEND 687; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 688; GFX90A-NEXT: v_mov_b32_e32 v2, 0 689; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 690; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 691; GFX90A-NEXT: s_waitcnt vmcnt(0) 692; GFX90A-NEXT: s_setpc_b64 s[30:31] 693; 694; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__7_7: 695; GFX940: ; %bb.0: 696; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 697; GFX940-NEXT: ;;#ASMSTART 698; GFX940-NEXT: ; def v[0:1] 699; GFX940-NEXT: ;;#ASMEND 700; GFX940-NEXT: s_mov_b32 s2, 0x7060302 701; GFX940-NEXT: v_mov_b32_e32 v2, 0 702; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 703; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 704; GFX940-NEXT: s_waitcnt vmcnt(0) 705; GFX940-NEXT: s_setpc_b64 s[30:31] 706 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 707 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 708 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 7> 709 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 710 ret void 711} 712 713define void @v_shuffle_v2bf16_v4bf16__u_0(ptr addrspace(1) inreg %ptr) { 714; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__u_0: 715; GFX900: ; %bb.0: 716; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 717; GFX900-NEXT: ;;#ASMSTART 718; GFX900-NEXT: ; def v[0:1] 719; GFX900-NEXT: ;;#ASMEND 720; GFX900-NEXT: v_mov_b32_e32 v2, 0 721; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 722; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 723; GFX900-NEXT: s_waitcnt vmcnt(0) 724; GFX900-NEXT: s_setpc_b64 s[30:31] 725; 726; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__u_0: 727; GFX90A: ; %bb.0: 728; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 729; GFX90A-NEXT: ;;#ASMSTART 730; GFX90A-NEXT: ; def v[0:1] 731; GFX90A-NEXT: ;;#ASMEND 732; GFX90A-NEXT: v_mov_b32_e32 v2, 0 733; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 734; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 735; GFX90A-NEXT: s_waitcnt vmcnt(0) 736; GFX90A-NEXT: s_setpc_b64 s[30:31] 737; 738; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_0: 739; GFX940: ; %bb.0: 740; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 741; GFX940-NEXT: ;;#ASMSTART 742; GFX940-NEXT: ; def v[0:1] 743; GFX940-NEXT: ;;#ASMEND 744; GFX940-NEXT: v_mov_b32_e32 v2, 0 745; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 746; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 747; GFX940-NEXT: s_waitcnt vmcnt(0) 748; GFX940-NEXT: s_setpc_b64 s[30:31] 749 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 750 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 0> 751 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 752 ret void 753} 754 755define void @v_shuffle_v2bf16_v4bf16__0_0(ptr addrspace(1) inreg %ptr) { 756; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__0_0: 757; GFX900: ; %bb.0: 758; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 759; GFX900-NEXT: ;;#ASMSTART 760; GFX900-NEXT: ; def v[0:1] 761; GFX900-NEXT: ;;#ASMEND 762; GFX900-NEXT: s_mov_b32 s4, 0x5040100 763; GFX900-NEXT: v_mov_b32_e32 v2, 0 764; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 765; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 766; GFX900-NEXT: s_waitcnt vmcnt(0) 767; GFX900-NEXT: s_setpc_b64 s[30:31] 768; 769; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__0_0: 770; GFX90A: ; %bb.0: 771; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 772; GFX90A-NEXT: ;;#ASMSTART 773; GFX90A-NEXT: ; def v[0:1] 774; GFX90A-NEXT: ;;#ASMEND 775; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 776; GFX90A-NEXT: v_mov_b32_e32 v2, 0 777; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 778; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 779; GFX90A-NEXT: s_waitcnt vmcnt(0) 780; GFX90A-NEXT: s_setpc_b64 s[30:31] 781; 782; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_0: 783; GFX940: ; %bb.0: 784; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 785; GFX940-NEXT: ;;#ASMSTART 786; GFX940-NEXT: ; def v[0:1] 787; GFX940-NEXT: ;;#ASMEND 788; GFX940-NEXT: s_mov_b32 s2, 0x5040100 789; GFX940-NEXT: v_mov_b32_e32 v2, 0 790; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 791; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 792; GFX940-NEXT: s_waitcnt vmcnt(0) 793; GFX940-NEXT: s_setpc_b64 s[30:31] 794 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 795 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> zeroinitializer 796 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 797 ret void 798} 799 800define void @v_shuffle_v2bf16_v4bf16__1_0(ptr addrspace(1) inreg %ptr) { 801; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__1_0: 802; GFX900: ; %bb.0: 803; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 804; GFX900-NEXT: ;;#ASMSTART 805; GFX900-NEXT: ; def v[0:1] 806; GFX900-NEXT: ;;#ASMEND 807; GFX900-NEXT: v_mov_b32_e32 v2, 0 808; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 809; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 810; GFX900-NEXT: s_waitcnt vmcnt(0) 811; GFX900-NEXT: s_setpc_b64 s[30:31] 812; 813; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__1_0: 814; GFX90A: ; %bb.0: 815; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 816; GFX90A-NEXT: ;;#ASMSTART 817; GFX90A-NEXT: ; def v[0:1] 818; GFX90A-NEXT: ;;#ASMEND 819; GFX90A-NEXT: v_mov_b32_e32 v2, 0 820; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 821; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 822; GFX90A-NEXT: s_waitcnt vmcnt(0) 823; GFX90A-NEXT: s_setpc_b64 s[30:31] 824; 825; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_0: 826; GFX940: ; %bb.0: 827; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 828; GFX940-NEXT: ;;#ASMSTART 829; GFX940-NEXT: ; def v[0:1] 830; GFX940-NEXT: ;;#ASMEND 831; GFX940-NEXT: v_mov_b32_e32 v2, 0 832; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 833; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 834; GFX940-NEXT: s_waitcnt vmcnt(0) 835; GFX940-NEXT: s_setpc_b64 s[30:31] 836 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 837 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 0> 838 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 839 ret void 840} 841 842define void @v_shuffle_v2bf16_v4bf16__2_0(ptr addrspace(1) inreg %ptr) { 843; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__2_0: 844; GFX900: ; %bb.0: 845; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 846; GFX900-NEXT: ;;#ASMSTART 847; GFX900-NEXT: ; def v[0:1] 848; GFX900-NEXT: ;;#ASMEND 849; GFX900-NEXT: s_mov_b32 s4, 0x5040100 850; GFX900-NEXT: v_mov_b32_e32 v2, 0 851; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 852; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 853; GFX900-NEXT: s_waitcnt vmcnt(0) 854; GFX900-NEXT: s_setpc_b64 s[30:31] 855; 856; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__2_0: 857; GFX90A: ; %bb.0: 858; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 859; GFX90A-NEXT: ;;#ASMSTART 860; GFX90A-NEXT: ; def v[0:1] 861; GFX90A-NEXT: ;;#ASMEND 862; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 863; GFX90A-NEXT: v_mov_b32_e32 v2, 0 864; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 865; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 866; GFX90A-NEXT: s_waitcnt vmcnt(0) 867; GFX90A-NEXT: s_setpc_b64 s[30:31] 868; 869; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_0: 870; GFX940: ; %bb.0: 871; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 872; GFX940-NEXT: ;;#ASMSTART 873; GFX940-NEXT: ; def v[0:1] 874; GFX940-NEXT: ;;#ASMEND 875; GFX940-NEXT: s_mov_b32 s2, 0x5040100 876; GFX940-NEXT: v_mov_b32_e32 v2, 0 877; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 878; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 879; GFX940-NEXT: s_waitcnt vmcnt(0) 880; GFX940-NEXT: s_setpc_b64 s[30:31] 881 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 882 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 0> 883 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 884 ret void 885} 886 887define void @v_shuffle_v2bf16_v4bf16__3_0(ptr addrspace(1) inreg %ptr) { 888; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__3_0: 889; GFX900: ; %bb.0: 890; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 891; GFX900-NEXT: ;;#ASMSTART 892; GFX900-NEXT: ; def v[0:1] 893; GFX900-NEXT: ;;#ASMEND 894; GFX900-NEXT: v_mov_b32_e32 v2, 0 895; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 896; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 897; GFX900-NEXT: s_waitcnt vmcnt(0) 898; GFX900-NEXT: s_setpc_b64 s[30:31] 899; 900; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__3_0: 901; GFX90A: ; %bb.0: 902; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 903; GFX90A-NEXT: ;;#ASMSTART 904; GFX90A-NEXT: ; def v[0:1] 905; GFX90A-NEXT: ;;#ASMEND 906; GFX90A-NEXT: v_mov_b32_e32 v2, 0 907; GFX90A-NEXT: v_alignbit_b32 v0, v0, v1, 16 908; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 909; GFX90A-NEXT: s_waitcnt vmcnt(0) 910; GFX90A-NEXT: s_setpc_b64 s[30:31] 911; 912; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_0: 913; GFX940: ; %bb.0: 914; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 915; GFX940-NEXT: ;;#ASMSTART 916; GFX940-NEXT: ; def v[0:1] 917; GFX940-NEXT: ;;#ASMEND 918; GFX940-NEXT: v_mov_b32_e32 v2, 0 919; GFX940-NEXT: v_alignbit_b32 v0, v0, v1, 16 920; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 921; GFX940-NEXT: s_waitcnt vmcnt(0) 922; GFX940-NEXT: s_setpc_b64 s[30:31] 923 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 924 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 0> 925 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 926 ret void 927} 928 929define void @v_shuffle_v2bf16_v4bf16__4_0(ptr addrspace(1) inreg %ptr) { 930; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__4_0: 931; GFX900: ; %bb.0: 932; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 933; GFX900-NEXT: ;;#ASMSTART 934; GFX900-NEXT: ; def v[0:1] 935; GFX900-NEXT: ;;#ASMEND 936; GFX900-NEXT: v_mov_b32_e32 v2, 0 937; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v0 938; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 939; GFX900-NEXT: s_waitcnt vmcnt(0) 940; GFX900-NEXT: s_setpc_b64 s[30:31] 941; 942; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__4_0: 943; GFX90A: ; %bb.0: 944; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 945; GFX90A-NEXT: ;;#ASMSTART 946; GFX90A-NEXT: ; def v[0:1] 947; GFX90A-NEXT: ;;#ASMEND 948; GFX90A-NEXT: v_mov_b32_e32 v2, 0 949; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v0 950; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 951; GFX90A-NEXT: s_waitcnt vmcnt(0) 952; GFX90A-NEXT: s_setpc_b64 s[30:31] 953; 954; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_0: 955; GFX940: ; %bb.0: 956; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 957; GFX940-NEXT: ;;#ASMSTART 958; GFX940-NEXT: ; def v[0:1] 959; GFX940-NEXT: ;;#ASMEND 960; GFX940-NEXT: v_mov_b32_e32 v2, 0 961; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v0 962; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 963; GFX940-NEXT: s_waitcnt vmcnt(0) 964; GFX940-NEXT: s_setpc_b64 s[30:31] 965 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 966 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 0> 967 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 968 ret void 969} 970 971define void @v_shuffle_v2bf16_v4bf16__5_0(ptr addrspace(1) inreg %ptr) { 972; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__5_0: 973; GFX900: ; %bb.0: 974; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 975; GFX900-NEXT: ;;#ASMSTART 976; GFX900-NEXT: ; def v[0:1] 977; GFX900-NEXT: ;;#ASMEND 978; GFX900-NEXT: v_mov_b32_e32 v3, 0 979; GFX900-NEXT: ;;#ASMSTART 980; GFX900-NEXT: ; def v[1:2] 981; GFX900-NEXT: ;;#ASMEND 982; GFX900-NEXT: v_alignbit_b32 v0, v0, v1, 16 983; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 984; GFX900-NEXT: s_waitcnt vmcnt(0) 985; GFX900-NEXT: s_setpc_b64 s[30:31] 986; 987; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__5_0: 988; GFX90A: ; %bb.0: 989; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 990; GFX90A-NEXT: ;;#ASMSTART 991; GFX90A-NEXT: ; def v[0:1] 992; GFX90A-NEXT: ;;#ASMEND 993; GFX90A-NEXT: v_mov_b32_e32 v4, 0 994; GFX90A-NEXT: ;;#ASMSTART 995; GFX90A-NEXT: ; def v[2:3] 996; GFX90A-NEXT: ;;#ASMEND 997; GFX90A-NEXT: v_alignbit_b32 v0, v0, v2, 16 998; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 999; GFX90A-NEXT: s_waitcnt vmcnt(0) 1000; GFX90A-NEXT: s_setpc_b64 s[30:31] 1001; 1002; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_0: 1003; GFX940: ; %bb.0: 1004; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1005; GFX940-NEXT: ;;#ASMSTART 1006; GFX940-NEXT: ; def v[0:1] 1007; GFX940-NEXT: ;;#ASMEND 1008; GFX940-NEXT: v_mov_b32_e32 v4, 0 1009; GFX940-NEXT: ;;#ASMSTART 1010; GFX940-NEXT: ; def v[2:3] 1011; GFX940-NEXT: ;;#ASMEND 1012; GFX940-NEXT: s_nop 0 1013; GFX940-NEXT: v_alignbit_b32 v0, v0, v2, 16 1014; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1015; GFX940-NEXT: s_waitcnt vmcnt(0) 1016; GFX940-NEXT: s_setpc_b64 s[30:31] 1017 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1018 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 1019 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 0> 1020 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1021 ret void 1022} 1023 1024define void @v_shuffle_v2bf16_v4bf16__6_0(ptr addrspace(1) inreg %ptr) { 1025; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__6_0: 1026; GFX900: ; %bb.0: 1027; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1028; GFX900-NEXT: ;;#ASMSTART 1029; GFX900-NEXT: ; def v[0:1] 1030; GFX900-NEXT: ;;#ASMEND 1031; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1032; GFX900-NEXT: v_mov_b32_e32 v3, 0 1033; GFX900-NEXT: ;;#ASMSTART 1034; GFX900-NEXT: ; def v[1:2] 1035; GFX900-NEXT: ;;#ASMEND 1036; GFX900-NEXT: v_perm_b32 v0, v0, v2, s4 1037; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 1038; GFX900-NEXT: s_waitcnt vmcnt(0) 1039; GFX900-NEXT: s_setpc_b64 s[30:31] 1040; 1041; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__6_0: 1042; GFX90A: ; %bb.0: 1043; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1044; GFX90A-NEXT: ;;#ASMSTART 1045; GFX90A-NEXT: ; def v[0:1] 1046; GFX90A-NEXT: ;;#ASMEND 1047; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1048; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1049; GFX90A-NEXT: ;;#ASMSTART 1050; GFX90A-NEXT: ; def v[2:3] 1051; GFX90A-NEXT: ;;#ASMEND 1052; GFX90A-NEXT: v_perm_b32 v0, v0, v3, s4 1053; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1054; GFX90A-NEXT: s_waitcnt vmcnt(0) 1055; GFX90A-NEXT: s_setpc_b64 s[30:31] 1056; 1057; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_0: 1058; GFX940: ; %bb.0: 1059; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1060; GFX940-NEXT: ;;#ASMSTART 1061; GFX940-NEXT: ; def v[0:1] 1062; GFX940-NEXT: ;;#ASMEND 1063; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1064; GFX940-NEXT: v_mov_b32_e32 v4, 0 1065; GFX940-NEXT: ;;#ASMSTART 1066; GFX940-NEXT: ; def v[2:3] 1067; GFX940-NEXT: ;;#ASMEND 1068; GFX940-NEXT: s_nop 0 1069; GFX940-NEXT: v_perm_b32 v0, v0, v3, s2 1070; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1071; GFX940-NEXT: s_waitcnt vmcnt(0) 1072; GFX940-NEXT: s_setpc_b64 s[30:31] 1073 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1074 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 1075 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 0> 1076 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1077 ret void 1078} 1079 1080define void @v_shuffle_v2bf16_v4bf16__u_1(ptr addrspace(1) inreg %ptr) { 1081; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__u_1: 1082; GFX900: ; %bb.0: 1083; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1084; GFX900-NEXT: v_mov_b32_e32 v2, 0 1085; GFX900-NEXT: ;;#ASMSTART 1086; GFX900-NEXT: ; def v[0:1] 1087; GFX900-NEXT: ;;#ASMEND 1088; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1089; GFX900-NEXT: s_waitcnt vmcnt(0) 1090; GFX900-NEXT: s_setpc_b64 s[30:31] 1091; 1092; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__u_1: 1093; GFX90A: ; %bb.0: 1094; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1095; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1096; GFX90A-NEXT: ;;#ASMSTART 1097; GFX90A-NEXT: ; def v[0:1] 1098; GFX90A-NEXT: ;;#ASMEND 1099; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1100; GFX90A-NEXT: s_waitcnt vmcnt(0) 1101; GFX90A-NEXT: s_setpc_b64 s[30:31] 1102; 1103; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_1: 1104; GFX940: ; %bb.0: 1105; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1106; GFX940-NEXT: v_mov_b32_e32 v2, 0 1107; GFX940-NEXT: ;;#ASMSTART 1108; GFX940-NEXT: ; def v[0:1] 1109; GFX940-NEXT: ;;#ASMEND 1110; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1111; GFX940-NEXT: s_waitcnt vmcnt(0) 1112; GFX940-NEXT: s_setpc_b64 s[30:31] 1113 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1114 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 1> 1115 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1116 ret void 1117} 1118 1119define void @v_shuffle_v2bf16_v4bf16__0_1(ptr addrspace(1) inreg %ptr) { 1120; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__0_1: 1121; GFX900: ; %bb.0: 1122; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1123; GFX900-NEXT: v_mov_b32_e32 v2, 0 1124; GFX900-NEXT: ;;#ASMSTART 1125; GFX900-NEXT: ; def v[0:1] 1126; GFX900-NEXT: ;;#ASMEND 1127; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1128; GFX900-NEXT: s_waitcnt vmcnt(0) 1129; GFX900-NEXT: s_setpc_b64 s[30:31] 1130; 1131; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__0_1: 1132; GFX90A: ; %bb.0: 1133; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1134; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1135; GFX90A-NEXT: ;;#ASMSTART 1136; GFX90A-NEXT: ; def v[0:1] 1137; GFX90A-NEXT: ;;#ASMEND 1138; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1139; GFX90A-NEXT: s_waitcnt vmcnt(0) 1140; GFX90A-NEXT: s_setpc_b64 s[30:31] 1141; 1142; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_1: 1143; GFX940: ; %bb.0: 1144; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1145; GFX940-NEXT: v_mov_b32_e32 v2, 0 1146; GFX940-NEXT: ;;#ASMSTART 1147; GFX940-NEXT: ; def v[0:1] 1148; GFX940-NEXT: ;;#ASMEND 1149; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1150; GFX940-NEXT: s_waitcnt vmcnt(0) 1151; GFX940-NEXT: s_setpc_b64 s[30:31] 1152 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1153 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1> 1154 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1155 ret void 1156} 1157 1158define void @v_shuffle_v2bf16_v4bf16__1_1(ptr addrspace(1) inreg %ptr) { 1159; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__1_1: 1160; GFX900: ; %bb.0: 1161; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1162; GFX900-NEXT: ;;#ASMSTART 1163; GFX900-NEXT: ; def v[0:1] 1164; GFX900-NEXT: ;;#ASMEND 1165; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1166; GFX900-NEXT: v_mov_b32_e32 v2, 0 1167; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 1168; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1169; GFX900-NEXT: s_waitcnt vmcnt(0) 1170; GFX900-NEXT: s_setpc_b64 s[30:31] 1171; 1172; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__1_1: 1173; GFX90A: ; %bb.0: 1174; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1175; GFX90A-NEXT: ;;#ASMSTART 1176; GFX90A-NEXT: ; def v[0:1] 1177; GFX90A-NEXT: ;;#ASMEND 1178; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1179; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1180; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 1181; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1182; GFX90A-NEXT: s_waitcnt vmcnt(0) 1183; GFX90A-NEXT: s_setpc_b64 s[30:31] 1184; 1185; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_1: 1186; GFX940: ; %bb.0: 1187; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1188; GFX940-NEXT: ;;#ASMSTART 1189; GFX940-NEXT: ; def v[0:1] 1190; GFX940-NEXT: ;;#ASMEND 1191; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1192; GFX940-NEXT: v_mov_b32_e32 v2, 0 1193; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 1194; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1195; GFX940-NEXT: s_waitcnt vmcnt(0) 1196; GFX940-NEXT: s_setpc_b64 s[30:31] 1197 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1198 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 1> 1199 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1200 ret void 1201} 1202 1203define void @v_shuffle_v2bf16_v4bf16__2_1(ptr addrspace(1) inreg %ptr) { 1204; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__2_1: 1205; GFX900: ; %bb.0: 1206; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1207; GFX900-NEXT: ;;#ASMSTART 1208; GFX900-NEXT: ; def v[0:1] 1209; GFX900-NEXT: ;;#ASMEND 1210; GFX900-NEXT: s_mov_b32 s4, 0xffff 1211; GFX900-NEXT: v_mov_b32_e32 v2, 0 1212; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 1213; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1214; GFX900-NEXT: s_waitcnt vmcnt(0) 1215; GFX900-NEXT: s_setpc_b64 s[30:31] 1216; 1217; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__2_1: 1218; GFX90A: ; %bb.0: 1219; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1220; GFX90A-NEXT: ;;#ASMSTART 1221; GFX90A-NEXT: ; def v[0:1] 1222; GFX90A-NEXT: ;;#ASMEND 1223; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1224; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1225; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 1226; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1227; GFX90A-NEXT: s_waitcnt vmcnt(0) 1228; GFX90A-NEXT: s_setpc_b64 s[30:31] 1229; 1230; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_1: 1231; GFX940: ; %bb.0: 1232; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1233; GFX940-NEXT: ;;#ASMSTART 1234; GFX940-NEXT: ; def v[0:1] 1235; GFX940-NEXT: ;;#ASMEND 1236; GFX940-NEXT: s_mov_b32 s2, 0xffff 1237; GFX940-NEXT: v_mov_b32_e32 v2, 0 1238; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 1239; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1240; GFX940-NEXT: s_waitcnt vmcnt(0) 1241; GFX940-NEXT: s_setpc_b64 s[30:31] 1242 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1243 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 1> 1244 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1245 ret void 1246} 1247 1248define void @v_shuffle_v2bf16_v4bf16__3_1(ptr addrspace(1) inreg %ptr) { 1249; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__3_1: 1250; GFX900: ; %bb.0: 1251; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1252; GFX900-NEXT: ;;#ASMSTART 1253; GFX900-NEXT: ; def v[0:1] 1254; GFX900-NEXT: ;;#ASMEND 1255; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1256; GFX900-NEXT: v_mov_b32_e32 v2, 0 1257; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 1258; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1259; GFX900-NEXT: s_waitcnt vmcnt(0) 1260; GFX900-NEXT: s_setpc_b64 s[30:31] 1261; 1262; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__3_1: 1263; GFX90A: ; %bb.0: 1264; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1265; GFX90A-NEXT: ;;#ASMSTART 1266; GFX90A-NEXT: ; def v[0:1] 1267; GFX90A-NEXT: ;;#ASMEND 1268; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1269; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1270; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 1271; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1272; GFX90A-NEXT: s_waitcnt vmcnt(0) 1273; GFX90A-NEXT: s_setpc_b64 s[30:31] 1274; 1275; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_1: 1276; GFX940: ; %bb.0: 1277; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1278; GFX940-NEXT: ;;#ASMSTART 1279; GFX940-NEXT: ; def v[0:1] 1280; GFX940-NEXT: ;;#ASMEND 1281; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1282; GFX940-NEXT: v_mov_b32_e32 v2, 0 1283; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 1284; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1285; GFX940-NEXT: s_waitcnt vmcnt(0) 1286; GFX940-NEXT: s_setpc_b64 s[30:31] 1287 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1288 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 1> 1289 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1290 ret void 1291} 1292 1293define void @v_shuffle_v2bf16_v4bf16__4_1(ptr addrspace(1) inreg %ptr) { 1294; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__4_1: 1295; GFX900: ; %bb.0: 1296; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1297; GFX900-NEXT: v_mov_b32_e32 v2, 0 1298; GFX900-NEXT: ;;#ASMSTART 1299; GFX900-NEXT: ; def v[0:1] 1300; GFX900-NEXT: ;;#ASMEND 1301; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1302; GFX900-NEXT: s_waitcnt vmcnt(0) 1303; GFX900-NEXT: s_setpc_b64 s[30:31] 1304; 1305; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__4_1: 1306; GFX90A: ; %bb.0: 1307; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1308; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1309; GFX90A-NEXT: ;;#ASMSTART 1310; GFX90A-NEXT: ; def v[0:1] 1311; GFX90A-NEXT: ;;#ASMEND 1312; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1313; GFX90A-NEXT: s_waitcnt vmcnt(0) 1314; GFX90A-NEXT: s_setpc_b64 s[30:31] 1315; 1316; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_1: 1317; GFX940: ; %bb.0: 1318; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1319; GFX940-NEXT: v_mov_b32_e32 v2, 0 1320; GFX940-NEXT: ;;#ASMSTART 1321; GFX940-NEXT: ; def v[0:1] 1322; GFX940-NEXT: ;;#ASMEND 1323; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1324; GFX940-NEXT: s_waitcnt vmcnt(0) 1325; GFX940-NEXT: s_setpc_b64 s[30:31] 1326 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1327 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 1> 1328 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1329 ret void 1330} 1331 1332define void @v_shuffle_v2bf16_v4bf16__5_1(ptr addrspace(1) inreg %ptr) { 1333; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__5_1: 1334; GFX900: ; %bb.0: 1335; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1336; GFX900-NEXT: ;;#ASMSTART 1337; GFX900-NEXT: ; def v[0:1] 1338; GFX900-NEXT: ;;#ASMEND 1339; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1340; GFX900-NEXT: v_mov_b32_e32 v3, 0 1341; GFX900-NEXT: ;;#ASMSTART 1342; GFX900-NEXT: ; def v[1:2] 1343; GFX900-NEXT: ;;#ASMEND 1344; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 1345; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 1346; GFX900-NEXT: s_waitcnt vmcnt(0) 1347; GFX900-NEXT: s_setpc_b64 s[30:31] 1348; 1349; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__5_1: 1350; GFX90A: ; %bb.0: 1351; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1352; GFX90A-NEXT: ;;#ASMSTART 1353; GFX90A-NEXT: ; def v[0:1] 1354; GFX90A-NEXT: ;;#ASMEND 1355; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1356; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1357; GFX90A-NEXT: ;;#ASMSTART 1358; GFX90A-NEXT: ; def v[2:3] 1359; GFX90A-NEXT: ;;#ASMEND 1360; GFX90A-NEXT: v_perm_b32 v0, v0, v2, s4 1361; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1362; GFX90A-NEXT: s_waitcnt vmcnt(0) 1363; GFX90A-NEXT: s_setpc_b64 s[30:31] 1364; 1365; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_1: 1366; GFX940: ; %bb.0: 1367; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1368; GFX940-NEXT: ;;#ASMSTART 1369; GFX940-NEXT: ; def v[0:1] 1370; GFX940-NEXT: ;;#ASMEND 1371; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1372; GFX940-NEXT: v_mov_b32_e32 v4, 0 1373; GFX940-NEXT: ;;#ASMSTART 1374; GFX940-NEXT: ; def v[2:3] 1375; GFX940-NEXT: ;;#ASMEND 1376; GFX940-NEXT: s_nop 0 1377; GFX940-NEXT: v_perm_b32 v0, v0, v2, s2 1378; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1379; GFX940-NEXT: s_waitcnt vmcnt(0) 1380; GFX940-NEXT: s_setpc_b64 s[30:31] 1381 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1382 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 1383 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 1> 1384 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1385 ret void 1386} 1387 1388define void @v_shuffle_v2bf16_v4bf16__6_1(ptr addrspace(1) inreg %ptr) { 1389; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__6_1: 1390; GFX900: ; %bb.0: 1391; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1392; GFX900-NEXT: ;;#ASMSTART 1393; GFX900-NEXT: ; def v[0:1] 1394; GFX900-NEXT: ;;#ASMEND 1395; GFX900-NEXT: s_mov_b32 s4, 0xffff 1396; GFX900-NEXT: v_mov_b32_e32 v3, 0 1397; GFX900-NEXT: ;;#ASMSTART 1398; GFX900-NEXT: ; def v[1:2] 1399; GFX900-NEXT: ;;#ASMEND 1400; GFX900-NEXT: v_bfi_b32 v0, s4, v2, v0 1401; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 1402; GFX900-NEXT: s_waitcnt vmcnt(0) 1403; GFX900-NEXT: s_setpc_b64 s[30:31] 1404; 1405; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__6_1: 1406; GFX90A: ; %bb.0: 1407; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1408; GFX90A-NEXT: ;;#ASMSTART 1409; GFX90A-NEXT: ; def v[0:1] 1410; GFX90A-NEXT: ;;#ASMEND 1411; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1412; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1413; GFX90A-NEXT: ;;#ASMSTART 1414; GFX90A-NEXT: ; def v[2:3] 1415; GFX90A-NEXT: ;;#ASMEND 1416; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v0 1417; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1418; GFX90A-NEXT: s_waitcnt vmcnt(0) 1419; GFX90A-NEXT: s_setpc_b64 s[30:31] 1420; 1421; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_1: 1422; GFX940: ; %bb.0: 1423; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1424; GFX940-NEXT: ;;#ASMSTART 1425; GFX940-NEXT: ; def v[0:1] 1426; GFX940-NEXT: ;;#ASMEND 1427; GFX940-NEXT: s_mov_b32 s2, 0xffff 1428; GFX940-NEXT: v_mov_b32_e32 v4, 0 1429; GFX940-NEXT: ;;#ASMSTART 1430; GFX940-NEXT: ; def v[2:3] 1431; GFX940-NEXT: ;;#ASMEND 1432; GFX940-NEXT: s_nop 0 1433; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v0 1434; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1435; GFX940-NEXT: s_waitcnt vmcnt(0) 1436; GFX940-NEXT: s_setpc_b64 s[30:31] 1437 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1438 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 1439 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 1> 1440 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1441 ret void 1442} 1443 1444define void @v_shuffle_v2bf16_v4bf16__u_2(ptr addrspace(1) inreg %ptr) { 1445; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__u_2: 1446; GFX900: ; %bb.0: 1447; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1448; GFX900-NEXT: ;;#ASMSTART 1449; GFX900-NEXT: ; def v[0:1] 1450; GFX900-NEXT: ;;#ASMEND 1451; GFX900-NEXT: v_mov_b32_e32 v2, 0 1452; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1453; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1454; GFX900-NEXT: s_waitcnt vmcnt(0) 1455; GFX900-NEXT: s_setpc_b64 s[30:31] 1456; 1457; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__u_2: 1458; GFX90A: ; %bb.0: 1459; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1460; GFX90A-NEXT: ;;#ASMSTART 1461; GFX90A-NEXT: ; def v[0:1] 1462; GFX90A-NEXT: ;;#ASMEND 1463; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1464; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1465; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1466; GFX90A-NEXT: s_waitcnt vmcnt(0) 1467; GFX90A-NEXT: s_setpc_b64 s[30:31] 1468; 1469; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_2: 1470; GFX940: ; %bb.0: 1471; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1472; GFX940-NEXT: ;;#ASMSTART 1473; GFX940-NEXT: ; def v[0:1] 1474; GFX940-NEXT: ;;#ASMEND 1475; GFX940-NEXT: v_mov_b32_e32 v2, 0 1476; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1477; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1478; GFX940-NEXT: s_waitcnt vmcnt(0) 1479; GFX940-NEXT: s_setpc_b64 s[30:31] 1480 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1481 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 2> 1482 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1483 ret void 1484} 1485 1486define void @v_shuffle_v2bf16_v4bf16__0_2(ptr addrspace(1) inreg %ptr) { 1487; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__0_2: 1488; GFX900: ; %bb.0: 1489; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1490; GFX900-NEXT: ;;#ASMSTART 1491; GFX900-NEXT: ; def v[0:1] 1492; GFX900-NEXT: ;;#ASMEND 1493; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1494; GFX900-NEXT: v_mov_b32_e32 v2, 0 1495; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 1496; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1497; GFX900-NEXT: s_waitcnt vmcnt(0) 1498; GFX900-NEXT: s_setpc_b64 s[30:31] 1499; 1500; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__0_2: 1501; GFX90A: ; %bb.0: 1502; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1503; GFX90A-NEXT: ;;#ASMSTART 1504; GFX90A-NEXT: ; def v[0:1] 1505; GFX90A-NEXT: ;;#ASMEND 1506; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1507; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1508; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 1509; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1510; GFX90A-NEXT: s_waitcnt vmcnt(0) 1511; GFX90A-NEXT: s_setpc_b64 s[30:31] 1512; 1513; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_2: 1514; GFX940: ; %bb.0: 1515; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1516; GFX940-NEXT: ;;#ASMSTART 1517; GFX940-NEXT: ; def v[0:1] 1518; GFX940-NEXT: ;;#ASMEND 1519; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1520; GFX940-NEXT: v_mov_b32_e32 v2, 0 1521; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 1522; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1523; GFX940-NEXT: s_waitcnt vmcnt(0) 1524; GFX940-NEXT: s_setpc_b64 s[30:31] 1525 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1526 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 2> 1527 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1528 ret void 1529} 1530 1531define void @v_shuffle_v2bf16_v4bf16__1_2(ptr addrspace(1) inreg %ptr) { 1532; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__1_2: 1533; GFX900: ; %bb.0: 1534; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1535; GFX900-NEXT: ;;#ASMSTART 1536; GFX900-NEXT: ; def v[0:1] 1537; GFX900-NEXT: ;;#ASMEND 1538; GFX900-NEXT: v_mov_b32_e32 v2, 0 1539; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 1540; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1541; GFX900-NEXT: s_waitcnt vmcnt(0) 1542; GFX900-NEXT: s_setpc_b64 s[30:31] 1543; 1544; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__1_2: 1545; GFX90A: ; %bb.0: 1546; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1547; GFX90A-NEXT: ;;#ASMSTART 1548; GFX90A-NEXT: ; def v[0:1] 1549; GFX90A-NEXT: ;;#ASMEND 1550; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1551; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 1552; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1553; GFX90A-NEXT: s_waitcnt vmcnt(0) 1554; GFX90A-NEXT: s_setpc_b64 s[30:31] 1555; 1556; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_2: 1557; GFX940: ; %bb.0: 1558; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1559; GFX940-NEXT: ;;#ASMSTART 1560; GFX940-NEXT: ; def v[0:1] 1561; GFX940-NEXT: ;;#ASMEND 1562; GFX940-NEXT: v_mov_b32_e32 v2, 0 1563; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 1564; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1565; GFX940-NEXT: s_waitcnt vmcnt(0) 1566; GFX940-NEXT: s_setpc_b64 s[30:31] 1567 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1568 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 2> 1569 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1570 ret void 1571} 1572 1573define void @v_shuffle_v2bf16_v4bf16__2_2(ptr addrspace(1) inreg %ptr) { 1574; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__2_2: 1575; GFX900: ; %bb.0: 1576; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1577; GFX900-NEXT: ;;#ASMSTART 1578; GFX900-NEXT: ; def v[0:1] 1579; GFX900-NEXT: ;;#ASMEND 1580; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1581; GFX900-NEXT: v_mov_b32_e32 v2, 0 1582; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 1583; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1584; GFX900-NEXT: s_waitcnt vmcnt(0) 1585; GFX900-NEXT: s_setpc_b64 s[30:31] 1586; 1587; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__2_2: 1588; GFX90A: ; %bb.0: 1589; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1590; GFX90A-NEXT: ;;#ASMSTART 1591; GFX90A-NEXT: ; def v[0:1] 1592; GFX90A-NEXT: ;;#ASMEND 1593; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1594; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1595; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 1596; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1597; GFX90A-NEXT: s_waitcnt vmcnt(0) 1598; GFX90A-NEXT: s_setpc_b64 s[30:31] 1599; 1600; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_2: 1601; GFX940: ; %bb.0: 1602; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1603; GFX940-NEXT: ;;#ASMSTART 1604; GFX940-NEXT: ; def v[0:1] 1605; GFX940-NEXT: ;;#ASMEND 1606; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1607; GFX940-NEXT: v_mov_b32_e32 v2, 0 1608; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 1609; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1610; GFX940-NEXT: s_waitcnt vmcnt(0) 1611; GFX940-NEXT: s_setpc_b64 s[30:31] 1612 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1613 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 2> 1614 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1615 ret void 1616} 1617 1618define void @v_shuffle_v2bf16_v4bf16__3_2(ptr addrspace(1) inreg %ptr) { 1619; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__3_2: 1620; GFX900: ; %bb.0: 1621; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1622; GFX900-NEXT: ;;#ASMSTART 1623; GFX900-NEXT: ; def v[0:1] 1624; GFX900-NEXT: ;;#ASMEND 1625; GFX900-NEXT: v_mov_b32_e32 v2, 0 1626; GFX900-NEXT: v_alignbit_b32 v0, v1, v1, 16 1627; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1628; GFX900-NEXT: s_waitcnt vmcnt(0) 1629; GFX900-NEXT: s_setpc_b64 s[30:31] 1630; 1631; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__3_2: 1632; GFX90A: ; %bb.0: 1633; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1634; GFX90A-NEXT: ;;#ASMSTART 1635; GFX90A-NEXT: ; def v[0:1] 1636; GFX90A-NEXT: ;;#ASMEND 1637; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1638; GFX90A-NEXT: v_alignbit_b32 v0, v1, v1, 16 1639; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1640; GFX90A-NEXT: s_waitcnt vmcnt(0) 1641; GFX90A-NEXT: s_setpc_b64 s[30:31] 1642; 1643; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_2: 1644; GFX940: ; %bb.0: 1645; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1646; GFX940-NEXT: ;;#ASMSTART 1647; GFX940-NEXT: ; def v[0:1] 1648; GFX940-NEXT: ;;#ASMEND 1649; GFX940-NEXT: v_mov_b32_e32 v2, 0 1650; GFX940-NEXT: v_alignbit_b32 v0, v1, v1, 16 1651; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1652; GFX940-NEXT: s_waitcnt vmcnt(0) 1653; GFX940-NEXT: s_setpc_b64 s[30:31] 1654 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1655 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 2> 1656 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1657 ret void 1658} 1659 1660define void @v_shuffle_v2bf16_v4bf16__4_2(ptr addrspace(1) inreg %ptr) { 1661; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__4_2: 1662; GFX900: ; %bb.0: 1663; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1664; GFX900-NEXT: ;;#ASMSTART 1665; GFX900-NEXT: ; def v[0:1] 1666; GFX900-NEXT: ;;#ASMEND 1667; GFX900-NEXT: v_mov_b32_e32 v2, 0 1668; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1669; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1670; GFX900-NEXT: s_waitcnt vmcnt(0) 1671; GFX900-NEXT: s_setpc_b64 s[30:31] 1672; 1673; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__4_2: 1674; GFX90A: ; %bb.0: 1675; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1676; GFX90A-NEXT: ;;#ASMSTART 1677; GFX90A-NEXT: ; def v[0:1] 1678; GFX90A-NEXT: ;;#ASMEND 1679; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1680; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1681; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1682; GFX90A-NEXT: s_waitcnt vmcnt(0) 1683; GFX90A-NEXT: s_setpc_b64 s[30:31] 1684; 1685; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_2: 1686; GFX940: ; %bb.0: 1687; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1688; GFX940-NEXT: ;;#ASMSTART 1689; GFX940-NEXT: ; def v[0:1] 1690; GFX940-NEXT: ;;#ASMEND 1691; GFX940-NEXT: v_mov_b32_e32 v2, 0 1692; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 1693; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1694; GFX940-NEXT: s_waitcnt vmcnt(0) 1695; GFX940-NEXT: s_setpc_b64 s[30:31] 1696 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1697 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 2> 1698 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1699 ret void 1700} 1701 1702define void @v_shuffle_v2bf16_v4bf16__5_2(ptr addrspace(1) inreg %ptr) { 1703; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__5_2: 1704; GFX900: ; %bb.0: 1705; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1706; GFX900-NEXT: ;;#ASMSTART 1707; GFX900-NEXT: ; def v[0:1] 1708; GFX900-NEXT: ;;#ASMEND 1709; GFX900-NEXT: v_mov_b32_e32 v4, 0 1710; GFX900-NEXT: ;;#ASMSTART 1711; GFX900-NEXT: ; def v[2:3] 1712; GFX900-NEXT: ;;#ASMEND 1713; GFX900-NEXT: v_alignbit_b32 v0, v1, v2, 16 1714; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1715; GFX900-NEXT: s_waitcnt vmcnt(0) 1716; GFX900-NEXT: s_setpc_b64 s[30:31] 1717; 1718; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__5_2: 1719; GFX90A: ; %bb.0: 1720; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1721; GFX90A-NEXT: ;;#ASMSTART 1722; GFX90A-NEXT: ; def v[0:1] 1723; GFX90A-NEXT: ;;#ASMEND 1724; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1725; GFX90A-NEXT: ;;#ASMSTART 1726; GFX90A-NEXT: ; def v[2:3] 1727; GFX90A-NEXT: ;;#ASMEND 1728; GFX90A-NEXT: v_alignbit_b32 v0, v1, v2, 16 1729; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1730; GFX90A-NEXT: s_waitcnt vmcnt(0) 1731; GFX90A-NEXT: s_setpc_b64 s[30:31] 1732; 1733; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_2: 1734; GFX940: ; %bb.0: 1735; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1736; GFX940-NEXT: ;;#ASMSTART 1737; GFX940-NEXT: ; def v[0:1] 1738; GFX940-NEXT: ;;#ASMEND 1739; GFX940-NEXT: v_mov_b32_e32 v4, 0 1740; GFX940-NEXT: ;;#ASMSTART 1741; GFX940-NEXT: ; def v[2:3] 1742; GFX940-NEXT: ;;#ASMEND 1743; GFX940-NEXT: s_nop 0 1744; GFX940-NEXT: v_alignbit_b32 v0, v1, v2, 16 1745; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1746; GFX940-NEXT: s_waitcnt vmcnt(0) 1747; GFX940-NEXT: s_setpc_b64 s[30:31] 1748 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1749 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 1750 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 2> 1751 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1752 ret void 1753} 1754 1755define void @v_shuffle_v2bf16_v4bf16__6_2(ptr addrspace(1) inreg %ptr) { 1756; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__6_2: 1757; GFX900: ; %bb.0: 1758; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1759; GFX900-NEXT: ;;#ASMSTART 1760; GFX900-NEXT: ; def v[0:1] 1761; GFX900-NEXT: ;;#ASMEND 1762; GFX900-NEXT: s_mov_b32 s4, 0x5040100 1763; GFX900-NEXT: v_mov_b32_e32 v4, 0 1764; GFX900-NEXT: ;;#ASMSTART 1765; GFX900-NEXT: ; def v[2:3] 1766; GFX900-NEXT: ;;#ASMEND 1767; GFX900-NEXT: v_perm_b32 v0, v1, v3, s4 1768; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 1769; GFX900-NEXT: s_waitcnt vmcnt(0) 1770; GFX900-NEXT: s_setpc_b64 s[30:31] 1771; 1772; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__6_2: 1773; GFX90A: ; %bb.0: 1774; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1775; GFX90A-NEXT: ;;#ASMSTART 1776; GFX90A-NEXT: ; def v[0:1] 1777; GFX90A-NEXT: ;;#ASMEND 1778; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 1779; GFX90A-NEXT: v_mov_b32_e32 v4, 0 1780; GFX90A-NEXT: ;;#ASMSTART 1781; GFX90A-NEXT: ; def v[2:3] 1782; GFX90A-NEXT: ;;#ASMEND 1783; GFX90A-NEXT: v_perm_b32 v0, v1, v3, s4 1784; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 1785; GFX90A-NEXT: s_waitcnt vmcnt(0) 1786; GFX90A-NEXT: s_setpc_b64 s[30:31] 1787; 1788; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_2: 1789; GFX940: ; %bb.0: 1790; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1791; GFX940-NEXT: ;;#ASMSTART 1792; GFX940-NEXT: ; def v[0:1] 1793; GFX940-NEXT: ;;#ASMEND 1794; GFX940-NEXT: s_mov_b32 s2, 0x5040100 1795; GFX940-NEXT: v_mov_b32_e32 v4, 0 1796; GFX940-NEXT: ;;#ASMSTART 1797; GFX940-NEXT: ; def v[2:3] 1798; GFX940-NEXT: ;;#ASMEND 1799; GFX940-NEXT: s_nop 0 1800; GFX940-NEXT: v_perm_b32 v0, v1, v3, s2 1801; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 1802; GFX940-NEXT: s_waitcnt vmcnt(0) 1803; GFX940-NEXT: s_setpc_b64 s[30:31] 1804 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1805 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 1806 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 2> 1807 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1808 ret void 1809} 1810 1811define void @v_shuffle_v2bf16_v4bf16__u_3(ptr addrspace(1) inreg %ptr) { 1812; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__u_3: 1813; GFX900: ; %bb.0: 1814; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1815; GFX900-NEXT: v_mov_b32_e32 v2, 0 1816; GFX900-NEXT: ;;#ASMSTART 1817; GFX900-NEXT: ; def v[0:1] 1818; GFX900-NEXT: ;;#ASMEND 1819; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1820; GFX900-NEXT: s_waitcnt vmcnt(0) 1821; GFX900-NEXT: s_setpc_b64 s[30:31] 1822; 1823; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__u_3: 1824; GFX90A: ; %bb.0: 1825; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1826; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1827; GFX90A-NEXT: ;;#ASMSTART 1828; GFX90A-NEXT: ; def v[0:1] 1829; GFX90A-NEXT: ;;#ASMEND 1830; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1831; GFX90A-NEXT: s_waitcnt vmcnt(0) 1832; GFX90A-NEXT: s_setpc_b64 s[30:31] 1833; 1834; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_3: 1835; GFX940: ; %bb.0: 1836; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1837; GFX940-NEXT: v_mov_b32_e32 v2, 0 1838; GFX940-NEXT: ;;#ASMSTART 1839; GFX940-NEXT: ; def v[0:1] 1840; GFX940-NEXT: ;;#ASMEND 1841; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1842; GFX940-NEXT: s_waitcnt vmcnt(0) 1843; GFX940-NEXT: s_setpc_b64 s[30:31] 1844 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1845 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 3> 1846 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1847 ret void 1848} 1849 1850define void @v_shuffle_v2bf16_v4bf16__0_3(ptr addrspace(1) inreg %ptr) { 1851; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__0_3: 1852; GFX900: ; %bb.0: 1853; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1854; GFX900-NEXT: ;;#ASMSTART 1855; GFX900-NEXT: ; def v[0:1] 1856; GFX900-NEXT: ;;#ASMEND 1857; GFX900-NEXT: s_mov_b32 s4, 0xffff 1858; GFX900-NEXT: v_mov_b32_e32 v2, 0 1859; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 1860; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1861; GFX900-NEXT: s_waitcnt vmcnt(0) 1862; GFX900-NEXT: s_setpc_b64 s[30:31] 1863; 1864; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__0_3: 1865; GFX90A: ; %bb.0: 1866; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1867; GFX90A-NEXT: ;;#ASMSTART 1868; GFX90A-NEXT: ; def v[0:1] 1869; GFX90A-NEXT: ;;#ASMEND 1870; GFX90A-NEXT: s_mov_b32 s4, 0xffff 1871; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1872; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 1873; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1874; GFX90A-NEXT: s_waitcnt vmcnt(0) 1875; GFX90A-NEXT: s_setpc_b64 s[30:31] 1876; 1877; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_3: 1878; GFX940: ; %bb.0: 1879; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1880; GFX940-NEXT: ;;#ASMSTART 1881; GFX940-NEXT: ; def v[0:1] 1882; GFX940-NEXT: ;;#ASMEND 1883; GFX940-NEXT: s_mov_b32 s2, 0xffff 1884; GFX940-NEXT: v_mov_b32_e32 v2, 0 1885; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 1886; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1887; GFX940-NEXT: s_waitcnt vmcnt(0) 1888; GFX940-NEXT: s_setpc_b64 s[30:31] 1889 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1890 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 3> 1891 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1892 ret void 1893} 1894 1895define void @v_shuffle_v2bf16_v4bf16__1_3(ptr addrspace(1) inreg %ptr) { 1896; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__1_3: 1897; GFX900: ; %bb.0: 1898; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1899; GFX900-NEXT: ;;#ASMSTART 1900; GFX900-NEXT: ; def v[0:1] 1901; GFX900-NEXT: ;;#ASMEND 1902; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1903; GFX900-NEXT: v_mov_b32_e32 v2, 0 1904; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 1905; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1906; GFX900-NEXT: s_waitcnt vmcnt(0) 1907; GFX900-NEXT: s_setpc_b64 s[30:31] 1908; 1909; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__1_3: 1910; GFX90A: ; %bb.0: 1911; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1912; GFX90A-NEXT: ;;#ASMSTART 1913; GFX90A-NEXT: ; def v[0:1] 1914; GFX90A-NEXT: ;;#ASMEND 1915; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 1916; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1917; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 1918; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 1919; GFX90A-NEXT: s_waitcnt vmcnt(0) 1920; GFX90A-NEXT: s_setpc_b64 s[30:31] 1921; 1922; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_3: 1923; GFX940: ; %bb.0: 1924; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1925; GFX940-NEXT: ;;#ASMSTART 1926; GFX940-NEXT: ; def v[0:1] 1927; GFX940-NEXT: ;;#ASMEND 1928; GFX940-NEXT: s_mov_b32 s2, 0x7060302 1929; GFX940-NEXT: v_mov_b32_e32 v2, 0 1930; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 1931; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 1932; GFX940-NEXT: s_waitcnt vmcnt(0) 1933; GFX940-NEXT: s_setpc_b64 s[30:31] 1934 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1935 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 3> 1936 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1937 ret void 1938} 1939 1940define void @v_shuffle_v2bf16_v4bf16__2_3(ptr addrspace(1) inreg %ptr) { 1941; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__2_3: 1942; GFX900: ; %bb.0: 1943; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1944; GFX900-NEXT: v_mov_b32_e32 v2, 0 1945; GFX900-NEXT: ;;#ASMSTART 1946; GFX900-NEXT: ; def v[0:1] 1947; GFX900-NEXT: ;;#ASMEND 1948; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 1949; GFX900-NEXT: s_waitcnt vmcnt(0) 1950; GFX900-NEXT: s_setpc_b64 s[30:31] 1951; 1952; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__2_3: 1953; GFX90A: ; %bb.0: 1954; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1955; GFX90A-NEXT: v_mov_b32_e32 v2, 0 1956; GFX90A-NEXT: ;;#ASMSTART 1957; GFX90A-NEXT: ; def v[0:1] 1958; GFX90A-NEXT: ;;#ASMEND 1959; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 1960; GFX90A-NEXT: s_waitcnt vmcnt(0) 1961; GFX90A-NEXT: s_setpc_b64 s[30:31] 1962; 1963; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_3: 1964; GFX940: ; %bb.0: 1965; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1966; GFX940-NEXT: v_mov_b32_e32 v2, 0 1967; GFX940-NEXT: ;;#ASMSTART 1968; GFX940-NEXT: ; def v[0:1] 1969; GFX940-NEXT: ;;#ASMEND 1970; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 1971; GFX940-NEXT: s_waitcnt vmcnt(0) 1972; GFX940-NEXT: s_setpc_b64 s[30:31] 1973 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 1974 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3> 1975 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 1976 ret void 1977} 1978 1979define void @v_shuffle_v2bf16_v4bf16__3_3(ptr addrspace(1) inreg %ptr) { 1980; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__3_3: 1981; GFX900: ; %bb.0: 1982; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1983; GFX900-NEXT: ;;#ASMSTART 1984; GFX900-NEXT: ; def v[0:1] 1985; GFX900-NEXT: ;;#ASMEND 1986; GFX900-NEXT: s_mov_b32 s4, 0x7060302 1987; GFX900-NEXT: v_mov_b32_e32 v2, 0 1988; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 1989; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 1990; GFX900-NEXT: s_waitcnt vmcnt(0) 1991; GFX900-NEXT: s_setpc_b64 s[30:31] 1992; 1993; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__3_3: 1994; GFX90A: ; %bb.0: 1995; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 1996; GFX90A-NEXT: ;;#ASMSTART 1997; GFX90A-NEXT: ; def v[0:1] 1998; GFX90A-NEXT: ;;#ASMEND 1999; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2000; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2001; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 2002; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2003; GFX90A-NEXT: s_waitcnt vmcnt(0) 2004; GFX90A-NEXT: s_setpc_b64 s[30:31] 2005; 2006; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_3: 2007; GFX940: ; %bb.0: 2008; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2009; GFX940-NEXT: ;;#ASMSTART 2010; GFX940-NEXT: ; def v[0:1] 2011; GFX940-NEXT: ;;#ASMEND 2012; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2013; GFX940-NEXT: v_mov_b32_e32 v2, 0 2014; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 2015; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2016; GFX940-NEXT: s_waitcnt vmcnt(0) 2017; GFX940-NEXT: s_setpc_b64 s[30:31] 2018 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2019 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 3> 2020 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2021 ret void 2022} 2023 2024define void @v_shuffle_v2bf16_v4bf16__4_3(ptr addrspace(1) inreg %ptr) { 2025; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__4_3: 2026; GFX900: ; %bb.0: 2027; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2028; GFX900-NEXT: v_mov_b32_e32 v2, 0 2029; GFX900-NEXT: ;;#ASMSTART 2030; GFX900-NEXT: ; def v[0:1] 2031; GFX900-NEXT: ;;#ASMEND 2032; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 2033; GFX900-NEXT: s_waitcnt vmcnt(0) 2034; GFX900-NEXT: s_setpc_b64 s[30:31] 2035; 2036; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__4_3: 2037; GFX90A: ; %bb.0: 2038; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2039; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2040; GFX90A-NEXT: ;;#ASMSTART 2041; GFX90A-NEXT: ; def v[0:1] 2042; GFX90A-NEXT: ;;#ASMEND 2043; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 2044; GFX90A-NEXT: s_waitcnt vmcnt(0) 2045; GFX90A-NEXT: s_setpc_b64 s[30:31] 2046; 2047; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_3: 2048; GFX940: ; %bb.0: 2049; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2050; GFX940-NEXT: v_mov_b32_e32 v2, 0 2051; GFX940-NEXT: ;;#ASMSTART 2052; GFX940-NEXT: ; def v[0:1] 2053; GFX940-NEXT: ;;#ASMEND 2054; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 2055; GFX940-NEXT: s_waitcnt vmcnt(0) 2056; GFX940-NEXT: s_setpc_b64 s[30:31] 2057 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2058 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 3> 2059 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2060 ret void 2061} 2062 2063define void @v_shuffle_v2bf16_v4bf16__5_3(ptr addrspace(1) inreg %ptr) { 2064; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__5_3: 2065; GFX900: ; %bb.0: 2066; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2067; GFX900-NEXT: ;;#ASMSTART 2068; GFX900-NEXT: ; def v[0:1] 2069; GFX900-NEXT: ;;#ASMEND 2070; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2071; GFX900-NEXT: v_mov_b32_e32 v4, 0 2072; GFX900-NEXT: ;;#ASMSTART 2073; GFX900-NEXT: ; def v[2:3] 2074; GFX900-NEXT: ;;#ASMEND 2075; GFX900-NEXT: v_perm_b32 v0, v1, v2, s4 2076; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2077; GFX900-NEXT: s_waitcnt vmcnt(0) 2078; GFX900-NEXT: s_setpc_b64 s[30:31] 2079; 2080; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__5_3: 2081; GFX90A: ; %bb.0: 2082; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2083; GFX90A-NEXT: ;;#ASMSTART 2084; GFX90A-NEXT: ; def v[0:1] 2085; GFX90A-NEXT: ;;#ASMEND 2086; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2087; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2088; GFX90A-NEXT: ;;#ASMSTART 2089; GFX90A-NEXT: ; def v[2:3] 2090; GFX90A-NEXT: ;;#ASMEND 2091; GFX90A-NEXT: v_perm_b32 v0, v1, v2, s4 2092; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2093; GFX90A-NEXT: s_waitcnt vmcnt(0) 2094; GFX90A-NEXT: s_setpc_b64 s[30:31] 2095; 2096; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_3: 2097; GFX940: ; %bb.0: 2098; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2099; GFX940-NEXT: ;;#ASMSTART 2100; GFX940-NEXT: ; def v[0:1] 2101; GFX940-NEXT: ;;#ASMEND 2102; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2103; GFX940-NEXT: v_mov_b32_e32 v4, 0 2104; GFX940-NEXT: ;;#ASMSTART 2105; GFX940-NEXT: ; def v[2:3] 2106; GFX940-NEXT: ;;#ASMEND 2107; GFX940-NEXT: s_nop 0 2108; GFX940-NEXT: v_perm_b32 v0, v1, v2, s2 2109; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2110; GFX940-NEXT: s_waitcnt vmcnt(0) 2111; GFX940-NEXT: s_setpc_b64 s[30:31] 2112 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2113 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2114 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 3> 2115 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2116 ret void 2117} 2118 2119define void @v_shuffle_v2bf16_v4bf16__6_3(ptr addrspace(1) inreg %ptr) { 2120; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__6_3: 2121; GFX900: ; %bb.0: 2122; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2123; GFX900-NEXT: ;;#ASMSTART 2124; GFX900-NEXT: ; def v[0:1] 2125; GFX900-NEXT: ;;#ASMEND 2126; GFX900-NEXT: s_mov_b32 s4, 0xffff 2127; GFX900-NEXT: v_mov_b32_e32 v4, 0 2128; GFX900-NEXT: ;;#ASMSTART 2129; GFX900-NEXT: ; def v[2:3] 2130; GFX900-NEXT: ;;#ASMEND 2131; GFX900-NEXT: v_bfi_b32 v0, s4, v3, v1 2132; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2133; GFX900-NEXT: s_waitcnt vmcnt(0) 2134; GFX900-NEXT: s_setpc_b64 s[30:31] 2135; 2136; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__6_3: 2137; GFX90A: ; %bb.0: 2138; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2139; GFX90A-NEXT: ;;#ASMSTART 2140; GFX90A-NEXT: ; def v[0:1] 2141; GFX90A-NEXT: ;;#ASMEND 2142; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2143; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2144; GFX90A-NEXT: ;;#ASMSTART 2145; GFX90A-NEXT: ; def v[2:3] 2146; GFX90A-NEXT: ;;#ASMEND 2147; GFX90A-NEXT: v_bfi_b32 v0, s4, v3, v1 2148; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2149; GFX90A-NEXT: s_waitcnt vmcnt(0) 2150; GFX90A-NEXT: s_setpc_b64 s[30:31] 2151; 2152; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_3: 2153; GFX940: ; %bb.0: 2154; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2155; GFX940-NEXT: ;;#ASMSTART 2156; GFX940-NEXT: ; def v[0:1] 2157; GFX940-NEXT: ;;#ASMEND 2158; GFX940-NEXT: s_mov_b32 s2, 0xffff 2159; GFX940-NEXT: v_mov_b32_e32 v4, 0 2160; GFX940-NEXT: ;;#ASMSTART 2161; GFX940-NEXT: ; def v[2:3] 2162; GFX940-NEXT: ;;#ASMEND 2163; GFX940-NEXT: s_nop 0 2164; GFX940-NEXT: v_bfi_b32 v0, s2, v3, v1 2165; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2166; GFX940-NEXT: s_waitcnt vmcnt(0) 2167; GFX940-NEXT: s_setpc_b64 s[30:31] 2168 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2169 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2170 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 3> 2171 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2172 ret void 2173} 2174 2175define void @v_shuffle_v2bf16_v4bf16__u_4(ptr addrspace(1) inreg %ptr) { 2176; GFX9-LABEL: v_shuffle_v2bf16_v4bf16__u_4: 2177; GFX9: ; %bb.0: 2178; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2179; GFX9-NEXT: s_setpc_b64 s[30:31] 2180 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2181 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 4> 2182 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2183 ret void 2184} 2185 2186define void @v_shuffle_v2bf16_v4bf16__0_4(ptr addrspace(1) inreg %ptr) { 2187; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__0_4: 2188; GFX900: ; %bb.0: 2189; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2190; GFX900-NEXT: v_mov_b32_e32 v2, 0 2191; GFX900-NEXT: ;;#ASMSTART 2192; GFX900-NEXT: ; def v[0:1] 2193; GFX900-NEXT: ;;#ASMEND 2194; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2195; GFX900-NEXT: s_waitcnt vmcnt(0) 2196; GFX900-NEXT: s_setpc_b64 s[30:31] 2197; 2198; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__0_4: 2199; GFX90A: ; %bb.0: 2200; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2201; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2202; GFX90A-NEXT: ;;#ASMSTART 2203; GFX90A-NEXT: ; def v[0:1] 2204; GFX90A-NEXT: ;;#ASMEND 2205; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2206; GFX90A-NEXT: s_waitcnt vmcnt(0) 2207; GFX90A-NEXT: s_setpc_b64 s[30:31] 2208; 2209; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_4: 2210; GFX940: ; %bb.0: 2211; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2212; GFX940-NEXT: v_mov_b32_e32 v2, 0 2213; GFX940-NEXT: ;;#ASMSTART 2214; GFX940-NEXT: ; def v[0:1] 2215; GFX940-NEXT: ;;#ASMEND 2216; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2217; GFX940-NEXT: s_waitcnt vmcnt(0) 2218; GFX940-NEXT: s_setpc_b64 s[30:31] 2219 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2220 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 4> 2221 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2222 ret void 2223} 2224 2225define void @v_shuffle_v2bf16_v4bf16__1_4(ptr addrspace(1) inreg %ptr) { 2226; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__1_4: 2227; GFX900: ; %bb.0: 2228; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2229; GFX900-NEXT: ;;#ASMSTART 2230; GFX900-NEXT: ; def v[0:1] 2231; GFX900-NEXT: ;;#ASMEND 2232; GFX900-NEXT: v_mov_b32_e32 v2, 0 2233; GFX900-NEXT: v_alignbit_b32 v0, s4, v0, 16 2234; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2235; GFX900-NEXT: s_waitcnt vmcnt(0) 2236; GFX900-NEXT: s_setpc_b64 s[30:31] 2237; 2238; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__1_4: 2239; GFX90A: ; %bb.0: 2240; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2241; GFX90A-NEXT: ;;#ASMSTART 2242; GFX90A-NEXT: ; def v[0:1] 2243; GFX90A-NEXT: ;;#ASMEND 2244; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2245; GFX90A-NEXT: v_alignbit_b32 v0, s4, v0, 16 2246; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2247; GFX90A-NEXT: s_waitcnt vmcnt(0) 2248; GFX90A-NEXT: s_setpc_b64 s[30:31] 2249; 2250; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_4: 2251; GFX940: ; %bb.0: 2252; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2253; GFX940-NEXT: ;;#ASMSTART 2254; GFX940-NEXT: ; def v[0:1] 2255; GFX940-NEXT: ;;#ASMEND 2256; GFX940-NEXT: v_mov_b32_e32 v2, 0 2257; GFX940-NEXT: v_alignbit_b32 v0, s0, v0, 16 2258; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2259; GFX940-NEXT: s_waitcnt vmcnt(0) 2260; GFX940-NEXT: s_setpc_b64 s[30:31] 2261 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2262 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 4> 2263 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2264 ret void 2265} 2266 2267define void @v_shuffle_v2bf16_v4bf16__2_4(ptr addrspace(1) inreg %ptr) { 2268; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__2_4: 2269; GFX900: ; %bb.0: 2270; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2271; GFX900-NEXT: v_mov_b32_e32 v2, 0 2272; GFX900-NEXT: ;;#ASMSTART 2273; GFX900-NEXT: ; def v[0:1] 2274; GFX900-NEXT: ;;#ASMEND 2275; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 2276; GFX900-NEXT: s_waitcnt vmcnt(0) 2277; GFX900-NEXT: s_setpc_b64 s[30:31] 2278; 2279; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__2_4: 2280; GFX90A: ; %bb.0: 2281; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2282; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2283; GFX90A-NEXT: ;;#ASMSTART 2284; GFX90A-NEXT: ; def v[0:1] 2285; GFX90A-NEXT: ;;#ASMEND 2286; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 2287; GFX90A-NEXT: s_waitcnt vmcnt(0) 2288; GFX90A-NEXT: s_setpc_b64 s[30:31] 2289; 2290; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_4: 2291; GFX940: ; %bb.0: 2292; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2293; GFX940-NEXT: v_mov_b32_e32 v2, 0 2294; GFX940-NEXT: ;;#ASMSTART 2295; GFX940-NEXT: ; def v[0:1] 2296; GFX940-NEXT: ;;#ASMEND 2297; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 2298; GFX940-NEXT: s_waitcnt vmcnt(0) 2299; GFX940-NEXT: s_setpc_b64 s[30:31] 2300 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2301 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 4> 2302 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2303 ret void 2304} 2305 2306define void @v_shuffle_v2bf16_v4bf16__3_4(ptr addrspace(1) inreg %ptr) { 2307; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__3_4: 2308; GFX900: ; %bb.0: 2309; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2310; GFX900-NEXT: ;;#ASMSTART 2311; GFX900-NEXT: ; def v[0:1] 2312; GFX900-NEXT: ;;#ASMEND 2313; GFX900-NEXT: v_mov_b32_e32 v2, 0 2314; GFX900-NEXT: v_alignbit_b32 v0, s4, v1, 16 2315; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2316; GFX900-NEXT: s_waitcnt vmcnt(0) 2317; GFX900-NEXT: s_setpc_b64 s[30:31] 2318; 2319; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__3_4: 2320; GFX90A: ; %bb.0: 2321; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2322; GFX90A-NEXT: ;;#ASMSTART 2323; GFX90A-NEXT: ; def v[0:1] 2324; GFX90A-NEXT: ;;#ASMEND 2325; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2326; GFX90A-NEXT: v_alignbit_b32 v0, s4, v1, 16 2327; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2328; GFX90A-NEXT: s_waitcnt vmcnt(0) 2329; GFX90A-NEXT: s_setpc_b64 s[30:31] 2330; 2331; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_4: 2332; GFX940: ; %bb.0: 2333; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2334; GFX940-NEXT: ;;#ASMSTART 2335; GFX940-NEXT: ; def v[0:1] 2336; GFX940-NEXT: ;;#ASMEND 2337; GFX940-NEXT: v_mov_b32_e32 v2, 0 2338; GFX940-NEXT: v_alignbit_b32 v0, s0, v1, 16 2339; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2340; GFX940-NEXT: s_waitcnt vmcnt(0) 2341; GFX940-NEXT: s_setpc_b64 s[30:31] 2342 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2343 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 4> 2344 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2345 ret void 2346} 2347 2348define void @v_shuffle_v2bf16_v4bf16__4_4(ptr addrspace(1) inreg %ptr) { 2349; GFX9-LABEL: v_shuffle_v2bf16_v4bf16__4_4: 2350; GFX9: ; %bb.0: 2351; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2352; GFX9-NEXT: s_setpc_b64 s[30:31] 2353 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2354 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 4> 2355 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2356 ret void 2357} 2358 2359define void @v_shuffle_v2bf16_v4bf16__5_4(ptr addrspace(1) inreg %ptr) { 2360; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__5_4: 2361; GFX900: ; %bb.0: 2362; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2363; GFX900-NEXT: ;;#ASMSTART 2364; GFX900-NEXT: ; def v[0:1] 2365; GFX900-NEXT: ;;#ASMEND 2366; GFX900-NEXT: v_mov_b32_e32 v2, 0 2367; GFX900-NEXT: v_alignbit_b32 v0, v0, v0, 16 2368; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2369; GFX900-NEXT: s_waitcnt vmcnt(0) 2370; GFX900-NEXT: s_setpc_b64 s[30:31] 2371; 2372; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__5_4: 2373; GFX90A: ; %bb.0: 2374; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2375; GFX90A-NEXT: ;;#ASMSTART 2376; GFX90A-NEXT: ; def v[0:1] 2377; GFX90A-NEXT: ;;#ASMEND 2378; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2379; GFX90A-NEXT: v_alignbit_b32 v0, v0, v0, 16 2380; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2381; GFX90A-NEXT: s_waitcnt vmcnt(0) 2382; GFX90A-NEXT: s_setpc_b64 s[30:31] 2383; 2384; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_4: 2385; GFX940: ; %bb.0: 2386; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2387; GFX940-NEXT: ;;#ASMSTART 2388; GFX940-NEXT: ; def v[0:1] 2389; GFX940-NEXT: ;;#ASMEND 2390; GFX940-NEXT: v_mov_b32_e32 v2, 0 2391; GFX940-NEXT: v_alignbit_b32 v0, v0, v0, 16 2392; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2393; GFX940-NEXT: s_waitcnt vmcnt(0) 2394; GFX940-NEXT: s_setpc_b64 s[30:31] 2395 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2396 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2397 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 4> 2398 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2399 ret void 2400} 2401 2402define void @v_shuffle_v2bf16_v4bf16__6_4(ptr addrspace(1) inreg %ptr) { 2403; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__6_4: 2404; GFX900: ; %bb.0: 2405; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2406; GFX900-NEXT: ;;#ASMSTART 2407; GFX900-NEXT: ; def v[0:1] 2408; GFX900-NEXT: ;;#ASMEND 2409; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2410; GFX900-NEXT: v_mov_b32_e32 v2, 0 2411; GFX900-NEXT: v_perm_b32 v0, v0, v1, s4 2412; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2413; GFX900-NEXT: s_waitcnt vmcnt(0) 2414; GFX900-NEXT: s_setpc_b64 s[30:31] 2415; 2416; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__6_4: 2417; GFX90A: ; %bb.0: 2418; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2419; GFX90A-NEXT: ;;#ASMSTART 2420; GFX90A-NEXT: ; def v[0:1] 2421; GFX90A-NEXT: ;;#ASMEND 2422; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2423; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2424; GFX90A-NEXT: v_perm_b32 v0, v0, v1, s4 2425; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2426; GFX90A-NEXT: s_waitcnt vmcnt(0) 2427; GFX90A-NEXT: s_setpc_b64 s[30:31] 2428; 2429; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_4: 2430; GFX940: ; %bb.0: 2431; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2432; GFX940-NEXT: ;;#ASMSTART 2433; GFX940-NEXT: ; def v[0:1] 2434; GFX940-NEXT: ;;#ASMEND 2435; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2436; GFX940-NEXT: v_mov_b32_e32 v2, 0 2437; GFX940-NEXT: v_perm_b32 v0, v0, v1, s2 2438; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2439; GFX940-NEXT: s_waitcnt vmcnt(0) 2440; GFX940-NEXT: s_setpc_b64 s[30:31] 2441 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2442 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2443 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 4> 2444 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2445 ret void 2446} 2447 2448define void @v_shuffle_v2bf16_v4bf16__u_5(ptr addrspace(1) inreg %ptr) { 2449; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__u_5: 2450; GFX900: ; %bb.0: 2451; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2452; GFX900-NEXT: v_mov_b32_e32 v2, 0 2453; GFX900-NEXT: ;;#ASMSTART 2454; GFX900-NEXT: ; def v[0:1] 2455; GFX900-NEXT: ;;#ASMEND 2456; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2457; GFX900-NEXT: s_waitcnt vmcnt(0) 2458; GFX900-NEXT: s_setpc_b64 s[30:31] 2459; 2460; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__u_5: 2461; GFX90A: ; %bb.0: 2462; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2463; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2464; GFX90A-NEXT: ;;#ASMSTART 2465; GFX90A-NEXT: ; def v[0:1] 2466; GFX90A-NEXT: ;;#ASMEND 2467; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2468; GFX90A-NEXT: s_waitcnt vmcnt(0) 2469; GFX90A-NEXT: s_setpc_b64 s[30:31] 2470; 2471; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_5: 2472; GFX940: ; %bb.0: 2473; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2474; GFX940-NEXT: v_mov_b32_e32 v2, 0 2475; GFX940-NEXT: ;;#ASMSTART 2476; GFX940-NEXT: ; def v[0:1] 2477; GFX940-NEXT: ;;#ASMEND 2478; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2479; GFX940-NEXT: s_waitcnt vmcnt(0) 2480; GFX940-NEXT: s_setpc_b64 s[30:31] 2481 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2482 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2483 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 poison, i32 5> 2484 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2485 ret void 2486} 2487 2488define void @v_shuffle_v2bf16_v4bf16__0_5(ptr addrspace(1) inreg %ptr) { 2489; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__0_5: 2490; GFX900: ; %bb.0: 2491; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2492; GFX900-NEXT: ;;#ASMSTART 2493; GFX900-NEXT: ; def v[0:1] 2494; GFX900-NEXT: ;;#ASMEND 2495; GFX900-NEXT: s_mov_b32 s4, 0xffff 2496; GFX900-NEXT: v_mov_b32_e32 v3, 0 2497; GFX900-NEXT: ;;#ASMSTART 2498; GFX900-NEXT: ; def v[1:2] 2499; GFX900-NEXT: ;;#ASMEND 2500; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 2501; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 2502; GFX900-NEXT: s_waitcnt vmcnt(0) 2503; GFX900-NEXT: s_setpc_b64 s[30:31] 2504; 2505; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__0_5: 2506; GFX90A: ; %bb.0: 2507; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2508; GFX90A-NEXT: ;;#ASMSTART 2509; GFX90A-NEXT: ; def v[0:1] 2510; GFX90A-NEXT: ;;#ASMEND 2511; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2512; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2513; GFX90A-NEXT: ;;#ASMSTART 2514; GFX90A-NEXT: ; def v[2:3] 2515; GFX90A-NEXT: ;;#ASMEND 2516; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v2 2517; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2518; GFX90A-NEXT: s_waitcnt vmcnt(0) 2519; GFX90A-NEXT: s_setpc_b64 s[30:31] 2520; 2521; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_5: 2522; GFX940: ; %bb.0: 2523; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2524; GFX940-NEXT: ;;#ASMSTART 2525; GFX940-NEXT: ; def v[0:1] 2526; GFX940-NEXT: ;;#ASMEND 2527; GFX940-NEXT: s_mov_b32 s2, 0xffff 2528; GFX940-NEXT: v_mov_b32_e32 v4, 0 2529; GFX940-NEXT: ;;#ASMSTART 2530; GFX940-NEXT: ; def v[2:3] 2531; GFX940-NEXT: ;;#ASMEND 2532; GFX940-NEXT: s_nop 0 2533; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v2 2534; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2535; GFX940-NEXT: s_waitcnt vmcnt(0) 2536; GFX940-NEXT: s_setpc_b64 s[30:31] 2537 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2538 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2539 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 0, i32 5> 2540 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2541 ret void 2542} 2543 2544define void @v_shuffle_v2bf16_v4bf16__1_5(ptr addrspace(1) inreg %ptr) { 2545; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__1_5: 2546; GFX900: ; %bb.0: 2547; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2548; GFX900-NEXT: ;;#ASMSTART 2549; GFX900-NEXT: ; def v[0:1] 2550; GFX900-NEXT: ;;#ASMEND 2551; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2552; GFX900-NEXT: v_mov_b32_e32 v3, 0 2553; GFX900-NEXT: ;;#ASMSTART 2554; GFX900-NEXT: ; def v[1:2] 2555; GFX900-NEXT: ;;#ASMEND 2556; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 2557; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 2558; GFX900-NEXT: s_waitcnt vmcnt(0) 2559; GFX900-NEXT: s_setpc_b64 s[30:31] 2560; 2561; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__1_5: 2562; GFX90A: ; %bb.0: 2563; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2564; GFX90A-NEXT: ;;#ASMSTART 2565; GFX90A-NEXT: ; def v[0:1] 2566; GFX90A-NEXT: ;;#ASMEND 2567; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2568; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2569; GFX90A-NEXT: ;;#ASMSTART 2570; GFX90A-NEXT: ; def v[2:3] 2571; GFX90A-NEXT: ;;#ASMEND 2572; GFX90A-NEXT: v_perm_b32 v0, v2, v0, s4 2573; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2574; GFX90A-NEXT: s_waitcnt vmcnt(0) 2575; GFX90A-NEXT: s_setpc_b64 s[30:31] 2576; 2577; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_5: 2578; GFX940: ; %bb.0: 2579; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2580; GFX940-NEXT: ;;#ASMSTART 2581; GFX940-NEXT: ; def v[0:1] 2582; GFX940-NEXT: ;;#ASMEND 2583; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2584; GFX940-NEXT: v_mov_b32_e32 v4, 0 2585; GFX940-NEXT: ;;#ASMSTART 2586; GFX940-NEXT: ; def v[2:3] 2587; GFX940-NEXT: ;;#ASMEND 2588; GFX940-NEXT: s_nop 0 2589; GFX940-NEXT: v_perm_b32 v0, v2, v0, s2 2590; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2591; GFX940-NEXT: s_waitcnt vmcnt(0) 2592; GFX940-NEXT: s_setpc_b64 s[30:31] 2593 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2594 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2595 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 1, i32 5> 2596 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2597 ret void 2598} 2599 2600define void @v_shuffle_v2bf16_v4bf16__2_5(ptr addrspace(1) inreg %ptr) { 2601; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__2_5: 2602; GFX900: ; %bb.0: 2603; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2604; GFX900-NEXT: ;;#ASMSTART 2605; GFX900-NEXT: ; def v[0:1] 2606; GFX900-NEXT: ;;#ASMEND 2607; GFX900-NEXT: s_mov_b32 s4, 0xffff 2608; GFX900-NEXT: v_mov_b32_e32 v4, 0 2609; GFX900-NEXT: ;;#ASMSTART 2610; GFX900-NEXT: ; def v[2:3] 2611; GFX900-NEXT: ;;#ASMEND 2612; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v2 2613; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2614; GFX900-NEXT: s_waitcnt vmcnt(0) 2615; GFX900-NEXT: s_setpc_b64 s[30:31] 2616; 2617; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__2_5: 2618; GFX90A: ; %bb.0: 2619; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2620; GFX90A-NEXT: ;;#ASMSTART 2621; GFX90A-NEXT: ; def v[0:1] 2622; GFX90A-NEXT: ;;#ASMEND 2623; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2624; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2625; GFX90A-NEXT: ;;#ASMSTART 2626; GFX90A-NEXT: ; def v[2:3] 2627; GFX90A-NEXT: ;;#ASMEND 2628; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v2 2629; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2630; GFX90A-NEXT: s_waitcnt vmcnt(0) 2631; GFX90A-NEXT: s_setpc_b64 s[30:31] 2632; 2633; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_5: 2634; GFX940: ; %bb.0: 2635; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2636; GFX940-NEXT: ;;#ASMSTART 2637; GFX940-NEXT: ; def v[0:1] 2638; GFX940-NEXT: ;;#ASMEND 2639; GFX940-NEXT: s_mov_b32 s2, 0xffff 2640; GFX940-NEXT: v_mov_b32_e32 v4, 0 2641; GFX940-NEXT: ;;#ASMSTART 2642; GFX940-NEXT: ; def v[2:3] 2643; GFX940-NEXT: ;;#ASMEND 2644; GFX940-NEXT: s_nop 0 2645; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v2 2646; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2647; GFX940-NEXT: s_waitcnt vmcnt(0) 2648; GFX940-NEXT: s_setpc_b64 s[30:31] 2649 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2650 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2651 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 2, i32 5> 2652 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2653 ret void 2654} 2655 2656define void @v_shuffle_v2bf16_v4bf16__3_5(ptr addrspace(1) inreg %ptr) { 2657; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__3_5: 2658; GFX900: ; %bb.0: 2659; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2660; GFX900-NEXT: ;;#ASMSTART 2661; GFX900-NEXT: ; def v[0:1] 2662; GFX900-NEXT: ;;#ASMEND 2663; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2664; GFX900-NEXT: v_mov_b32_e32 v4, 0 2665; GFX900-NEXT: ;;#ASMSTART 2666; GFX900-NEXT: ; def v[2:3] 2667; GFX900-NEXT: ;;#ASMEND 2668; GFX900-NEXT: v_perm_b32 v0, v2, v1, s4 2669; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 2670; GFX900-NEXT: s_waitcnt vmcnt(0) 2671; GFX900-NEXT: s_setpc_b64 s[30:31] 2672; 2673; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__3_5: 2674; GFX90A: ; %bb.0: 2675; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2676; GFX90A-NEXT: ;;#ASMSTART 2677; GFX90A-NEXT: ; def v[0:1] 2678; GFX90A-NEXT: ;;#ASMEND 2679; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2680; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2681; GFX90A-NEXT: ;;#ASMSTART 2682; GFX90A-NEXT: ; def v[2:3] 2683; GFX90A-NEXT: ;;#ASMEND 2684; GFX90A-NEXT: v_perm_b32 v0, v2, v1, s4 2685; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2686; GFX90A-NEXT: s_waitcnt vmcnt(0) 2687; GFX90A-NEXT: s_setpc_b64 s[30:31] 2688; 2689; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_5: 2690; GFX940: ; %bb.0: 2691; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2692; GFX940-NEXT: ;;#ASMSTART 2693; GFX940-NEXT: ; def v[0:1] 2694; GFX940-NEXT: ;;#ASMEND 2695; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2696; GFX940-NEXT: v_mov_b32_e32 v4, 0 2697; GFX940-NEXT: ;;#ASMSTART 2698; GFX940-NEXT: ; def v[2:3] 2699; GFX940-NEXT: ;;#ASMEND 2700; GFX940-NEXT: s_nop 0 2701; GFX940-NEXT: v_perm_b32 v0, v2, v1, s2 2702; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2703; GFX940-NEXT: s_waitcnt vmcnt(0) 2704; GFX940-NEXT: s_setpc_b64 s[30:31] 2705 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2706 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2707 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 3, i32 5> 2708 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2709 ret void 2710} 2711 2712define void @v_shuffle_v2bf16_v4bf16__4_5(ptr addrspace(1) inreg %ptr) { 2713; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__4_5: 2714; GFX900: ; %bb.0: 2715; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2716; GFX900-NEXT: v_mov_b32_e32 v2, 0 2717; GFX900-NEXT: ;;#ASMSTART 2718; GFX900-NEXT: ; def v[0:1] 2719; GFX900-NEXT: ;;#ASMEND 2720; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2721; GFX900-NEXT: s_waitcnt vmcnt(0) 2722; GFX900-NEXT: s_setpc_b64 s[30:31] 2723; 2724; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__4_5: 2725; GFX90A: ; %bb.0: 2726; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2727; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2728; GFX90A-NEXT: ;;#ASMSTART 2729; GFX90A-NEXT: ; def v[0:1] 2730; GFX90A-NEXT: ;;#ASMEND 2731; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2732; GFX90A-NEXT: s_waitcnt vmcnt(0) 2733; GFX90A-NEXT: s_setpc_b64 s[30:31] 2734; 2735; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_5: 2736; GFX940: ; %bb.0: 2737; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2738; GFX940-NEXT: v_mov_b32_e32 v2, 0 2739; GFX940-NEXT: ;;#ASMSTART 2740; GFX940-NEXT: ; def v[0:1] 2741; GFX940-NEXT: ;;#ASMEND 2742; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2743; GFX940-NEXT: s_waitcnt vmcnt(0) 2744; GFX940-NEXT: s_setpc_b64 s[30:31] 2745 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2746 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2747 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 4, i32 5> 2748 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2749 ret void 2750} 2751 2752define void @v_shuffle_v2bf16_v4bf16__5_5(ptr addrspace(1) inreg %ptr) { 2753; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__5_5: 2754; GFX900: ; %bb.0: 2755; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2756; GFX900-NEXT: ;;#ASMSTART 2757; GFX900-NEXT: ; def v[0:1] 2758; GFX900-NEXT: ;;#ASMEND 2759; GFX900-NEXT: s_mov_b32 s4, 0x7060302 2760; GFX900-NEXT: v_mov_b32_e32 v2, 0 2761; GFX900-NEXT: v_perm_b32 v0, v0, v0, s4 2762; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2763; GFX900-NEXT: s_waitcnt vmcnt(0) 2764; GFX900-NEXT: s_setpc_b64 s[30:31] 2765; 2766; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__5_5: 2767; GFX90A: ; %bb.0: 2768; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2769; GFX90A-NEXT: ;;#ASMSTART 2770; GFX90A-NEXT: ; def v[0:1] 2771; GFX90A-NEXT: ;;#ASMEND 2772; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 2773; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2774; GFX90A-NEXT: v_perm_b32 v0, v0, v0, s4 2775; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2776; GFX90A-NEXT: s_waitcnt vmcnt(0) 2777; GFX90A-NEXT: s_setpc_b64 s[30:31] 2778; 2779; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_5: 2780; GFX940: ; %bb.0: 2781; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2782; GFX940-NEXT: ;;#ASMSTART 2783; GFX940-NEXT: ; def v[0:1] 2784; GFX940-NEXT: ;;#ASMEND 2785; GFX940-NEXT: s_mov_b32 s2, 0x7060302 2786; GFX940-NEXT: v_mov_b32_e32 v2, 0 2787; GFX940-NEXT: v_perm_b32 v0, v0, v0, s2 2788; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2789; GFX940-NEXT: s_waitcnt vmcnt(0) 2790; GFX940-NEXT: s_setpc_b64 s[30:31] 2791 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2792 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2793 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 5> 2794 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2795 ret void 2796} 2797 2798define void @v_shuffle_v2bf16_v4bf16__6_5(ptr addrspace(1) inreg %ptr) { 2799; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__6_5: 2800; GFX900: ; %bb.0: 2801; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2802; GFX900-NEXT: ;;#ASMSTART 2803; GFX900-NEXT: ; def v[0:1] 2804; GFX900-NEXT: ;;#ASMEND 2805; GFX900-NEXT: s_mov_b32 s4, 0xffff 2806; GFX900-NEXT: v_mov_b32_e32 v2, 0 2807; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v0 2808; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2809; GFX900-NEXT: s_waitcnt vmcnt(0) 2810; GFX900-NEXT: s_setpc_b64 s[30:31] 2811; 2812; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__6_5: 2813; GFX90A: ; %bb.0: 2814; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2815; GFX90A-NEXT: ;;#ASMSTART 2816; GFX90A-NEXT: ; def v[0:1] 2817; GFX90A-NEXT: ;;#ASMEND 2818; GFX90A-NEXT: s_mov_b32 s4, 0xffff 2819; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2820; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v0 2821; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2822; GFX90A-NEXT: s_waitcnt vmcnt(0) 2823; GFX90A-NEXT: s_setpc_b64 s[30:31] 2824; 2825; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_5: 2826; GFX940: ; %bb.0: 2827; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2828; GFX940-NEXT: ;;#ASMSTART 2829; GFX940-NEXT: ; def v[0:1] 2830; GFX940-NEXT: ;;#ASMEND 2831; GFX940-NEXT: s_mov_b32 s2, 0xffff 2832; GFX940-NEXT: v_mov_b32_e32 v2, 0 2833; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v0 2834; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2835; GFX940-NEXT: s_waitcnt vmcnt(0) 2836; GFX940-NEXT: s_setpc_b64 s[30:31] 2837 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2838 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2839 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 5> 2840 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2841 ret void 2842} 2843 2844define void @v_shuffle_v2bf16_v4bf16__u_6(ptr addrspace(1) inreg %ptr) { 2845; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__u_6: 2846; GFX900: ; %bb.0: 2847; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2848; GFX900-NEXT: ;;#ASMSTART 2849; GFX900-NEXT: ; def v[0:1] 2850; GFX900-NEXT: ;;#ASMEND 2851; GFX900-NEXT: v_mov_b32_e32 v2, 0 2852; GFX900-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2853; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 2854; GFX900-NEXT: s_waitcnt vmcnt(0) 2855; GFX900-NEXT: s_setpc_b64 s[30:31] 2856; 2857; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__u_6: 2858; GFX90A: ; %bb.0: 2859; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2860; GFX90A-NEXT: ;;#ASMSTART 2861; GFX90A-NEXT: ; def v[0:1] 2862; GFX90A-NEXT: ;;#ASMEND 2863; GFX90A-NEXT: v_mov_b32_e32 v2, 0 2864; GFX90A-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2865; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 2866; GFX90A-NEXT: s_waitcnt vmcnt(0) 2867; GFX90A-NEXT: s_setpc_b64 s[30:31] 2868; 2869; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_6: 2870; GFX940: ; %bb.0: 2871; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2872; GFX940-NEXT: ;;#ASMSTART 2873; GFX940-NEXT: ; def v[0:1] 2874; GFX940-NEXT: ;;#ASMEND 2875; GFX940-NEXT: v_mov_b32_e32 v2, 0 2876; GFX940-NEXT: v_lshlrev_b32_e32 v0, 16, v1 2877; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 2878; GFX940-NEXT: s_waitcnt vmcnt(0) 2879; GFX940-NEXT: s_setpc_b64 s[30:31] 2880 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2881 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2882 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 poison, i32 6> 2883 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2884 ret void 2885} 2886 2887define void @v_shuffle_v2bf16_v4bf16__0_6(ptr addrspace(1) inreg %ptr) { 2888; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__0_6: 2889; GFX900: ; %bb.0: 2890; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2891; GFX900-NEXT: ;;#ASMSTART 2892; GFX900-NEXT: ; def v[0:1] 2893; GFX900-NEXT: ;;#ASMEND 2894; GFX900-NEXT: s_mov_b32 s4, 0x5040100 2895; GFX900-NEXT: v_mov_b32_e32 v3, 0 2896; GFX900-NEXT: ;;#ASMSTART 2897; GFX900-NEXT: ; def v[1:2] 2898; GFX900-NEXT: ;;#ASMEND 2899; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 2900; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 2901; GFX900-NEXT: s_waitcnt vmcnt(0) 2902; GFX900-NEXT: s_setpc_b64 s[30:31] 2903; 2904; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__0_6: 2905; GFX90A: ; %bb.0: 2906; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2907; GFX90A-NEXT: ;;#ASMSTART 2908; GFX90A-NEXT: ; def v[0:1] 2909; GFX90A-NEXT: ;;#ASMEND 2910; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 2911; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2912; GFX90A-NEXT: ;;#ASMSTART 2913; GFX90A-NEXT: ; def v[2:3] 2914; GFX90A-NEXT: ;;#ASMEND 2915; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 2916; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2917; GFX90A-NEXT: s_waitcnt vmcnt(0) 2918; GFX90A-NEXT: s_setpc_b64 s[30:31] 2919; 2920; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_6: 2921; GFX940: ; %bb.0: 2922; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2923; GFX940-NEXT: ;;#ASMSTART 2924; GFX940-NEXT: ; def v[0:1] 2925; GFX940-NEXT: ;;#ASMEND 2926; GFX940-NEXT: s_mov_b32 s2, 0x5040100 2927; GFX940-NEXT: v_mov_b32_e32 v4, 0 2928; GFX940-NEXT: ;;#ASMSTART 2929; GFX940-NEXT: ; def v[2:3] 2930; GFX940-NEXT: ;;#ASMEND 2931; GFX940-NEXT: s_nop 0 2932; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 2933; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2934; GFX940-NEXT: s_waitcnt vmcnt(0) 2935; GFX940-NEXT: s_setpc_b64 s[30:31] 2936 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2937 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2938 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 0, i32 6> 2939 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2940 ret void 2941} 2942 2943define void @v_shuffle_v2bf16_v4bf16__1_6(ptr addrspace(1) inreg %ptr) { 2944; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__1_6: 2945; GFX900: ; %bb.0: 2946; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2947; GFX900-NEXT: ;;#ASMSTART 2948; GFX900-NEXT: ; def v[0:1] 2949; GFX900-NEXT: ;;#ASMEND 2950; GFX900-NEXT: v_mov_b32_e32 v3, 0 2951; GFX900-NEXT: ;;#ASMSTART 2952; GFX900-NEXT: ; def v[1:2] 2953; GFX900-NEXT: ;;#ASMEND 2954; GFX900-NEXT: v_alignbit_b32 v0, v2, v0, 16 2955; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 2956; GFX900-NEXT: s_waitcnt vmcnt(0) 2957; GFX900-NEXT: s_setpc_b64 s[30:31] 2958; 2959; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__1_6: 2960; GFX90A: ; %bb.0: 2961; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2962; GFX90A-NEXT: ;;#ASMSTART 2963; GFX90A-NEXT: ; def v[0:1] 2964; GFX90A-NEXT: ;;#ASMEND 2965; GFX90A-NEXT: v_mov_b32_e32 v4, 0 2966; GFX90A-NEXT: ;;#ASMSTART 2967; GFX90A-NEXT: ; def v[2:3] 2968; GFX90A-NEXT: ;;#ASMEND 2969; GFX90A-NEXT: v_alignbit_b32 v0, v3, v0, 16 2970; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 2971; GFX90A-NEXT: s_waitcnt vmcnt(0) 2972; GFX90A-NEXT: s_setpc_b64 s[30:31] 2973; 2974; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_6: 2975; GFX940: ; %bb.0: 2976; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 2977; GFX940-NEXT: ;;#ASMSTART 2978; GFX940-NEXT: ; def v[0:1] 2979; GFX940-NEXT: ;;#ASMEND 2980; GFX940-NEXT: v_mov_b32_e32 v4, 0 2981; GFX940-NEXT: ;;#ASMSTART 2982; GFX940-NEXT: ; def v[2:3] 2983; GFX940-NEXT: ;;#ASMEND 2984; GFX940-NEXT: s_nop 0 2985; GFX940-NEXT: v_alignbit_b32 v0, v3, v0, 16 2986; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 2987; GFX940-NEXT: s_waitcnt vmcnt(0) 2988; GFX940-NEXT: s_setpc_b64 s[30:31] 2989 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 2990 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 2991 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 1, i32 6> 2992 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 2993 ret void 2994} 2995 2996define void @v_shuffle_v2bf16_v4bf16__2_6(ptr addrspace(1) inreg %ptr) { 2997; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__2_6: 2998; GFX900: ; %bb.0: 2999; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3000; GFX900-NEXT: ;;#ASMSTART 3001; GFX900-NEXT: ; def v[0:1] 3002; GFX900-NEXT: ;;#ASMEND 3003; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3004; GFX900-NEXT: v_mov_b32_e32 v4, 0 3005; GFX900-NEXT: ;;#ASMSTART 3006; GFX900-NEXT: ; def v[2:3] 3007; GFX900-NEXT: ;;#ASMEND 3008; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 3009; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3010; GFX900-NEXT: s_waitcnt vmcnt(0) 3011; GFX900-NEXT: s_setpc_b64 s[30:31] 3012; 3013; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__2_6: 3014; GFX90A: ; %bb.0: 3015; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3016; GFX90A-NEXT: ;;#ASMSTART 3017; GFX90A-NEXT: ; def v[0:1] 3018; GFX90A-NEXT: ;;#ASMEND 3019; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3020; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3021; GFX90A-NEXT: ;;#ASMSTART 3022; GFX90A-NEXT: ; def v[2:3] 3023; GFX90A-NEXT: ;;#ASMEND 3024; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 3025; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3026; GFX90A-NEXT: s_waitcnt vmcnt(0) 3027; GFX90A-NEXT: s_setpc_b64 s[30:31] 3028; 3029; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_6: 3030; GFX940: ; %bb.0: 3031; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3032; GFX940-NEXT: ;;#ASMSTART 3033; GFX940-NEXT: ; def v[0:1] 3034; GFX940-NEXT: ;;#ASMEND 3035; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3036; GFX940-NEXT: v_mov_b32_e32 v4, 0 3037; GFX940-NEXT: ;;#ASMSTART 3038; GFX940-NEXT: ; def v[2:3] 3039; GFX940-NEXT: ;;#ASMEND 3040; GFX940-NEXT: s_nop 0 3041; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 3042; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3043; GFX940-NEXT: s_waitcnt vmcnt(0) 3044; GFX940-NEXT: s_setpc_b64 s[30:31] 3045 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3046 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3047 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 2, i32 6> 3048 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3049 ret void 3050} 3051 3052define void @v_shuffle_v2bf16_v4bf16__3_6(ptr addrspace(1) inreg %ptr) { 3053; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__3_6: 3054; GFX900: ; %bb.0: 3055; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3056; GFX900-NEXT: ;;#ASMSTART 3057; GFX900-NEXT: ; def v[0:1] 3058; GFX900-NEXT: ;;#ASMEND 3059; GFX900-NEXT: v_mov_b32_e32 v4, 0 3060; GFX900-NEXT: ;;#ASMSTART 3061; GFX900-NEXT: ; def v[2:3] 3062; GFX900-NEXT: ;;#ASMEND 3063; GFX900-NEXT: v_alignbit_b32 v0, v3, v1, 16 3064; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3065; GFX900-NEXT: s_waitcnt vmcnt(0) 3066; GFX900-NEXT: s_setpc_b64 s[30:31] 3067; 3068; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__3_6: 3069; GFX90A: ; %bb.0: 3070; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3071; GFX90A-NEXT: ;;#ASMSTART 3072; GFX90A-NEXT: ; def v[0:1] 3073; GFX90A-NEXT: ;;#ASMEND 3074; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3075; GFX90A-NEXT: ;;#ASMSTART 3076; GFX90A-NEXT: ; def v[2:3] 3077; GFX90A-NEXT: ;;#ASMEND 3078; GFX90A-NEXT: v_alignbit_b32 v0, v3, v1, 16 3079; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3080; GFX90A-NEXT: s_waitcnt vmcnt(0) 3081; GFX90A-NEXT: s_setpc_b64 s[30:31] 3082; 3083; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_6: 3084; GFX940: ; %bb.0: 3085; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3086; GFX940-NEXT: ;;#ASMSTART 3087; GFX940-NEXT: ; def v[0:1] 3088; GFX940-NEXT: ;;#ASMEND 3089; GFX940-NEXT: v_mov_b32_e32 v4, 0 3090; GFX940-NEXT: ;;#ASMSTART 3091; GFX940-NEXT: ; def v[2:3] 3092; GFX940-NEXT: ;;#ASMEND 3093; GFX940-NEXT: s_nop 0 3094; GFX940-NEXT: v_alignbit_b32 v0, v3, v1, 16 3095; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3096; GFX940-NEXT: s_waitcnt vmcnt(0) 3097; GFX940-NEXT: s_setpc_b64 s[30:31] 3098 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3099 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3100 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 3, i32 6> 3101 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3102 ret void 3103} 3104 3105define void @v_shuffle_v2bf16_v4bf16__4_6(ptr addrspace(1) inreg %ptr) { 3106; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__4_6: 3107; GFX900: ; %bb.0: 3108; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3109; GFX900-NEXT: ;;#ASMSTART 3110; GFX900-NEXT: ; def v[0:1] 3111; GFX900-NEXT: ;;#ASMEND 3112; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3113; GFX900-NEXT: v_mov_b32_e32 v2, 0 3114; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 3115; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3116; GFX900-NEXT: s_waitcnt vmcnt(0) 3117; GFX900-NEXT: s_setpc_b64 s[30:31] 3118; 3119; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__4_6: 3120; GFX90A: ; %bb.0: 3121; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3122; GFX90A-NEXT: ;;#ASMSTART 3123; GFX90A-NEXT: ; def v[0:1] 3124; GFX90A-NEXT: ;;#ASMEND 3125; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3126; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3127; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 3128; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3129; GFX90A-NEXT: s_waitcnt vmcnt(0) 3130; GFX90A-NEXT: s_setpc_b64 s[30:31] 3131; 3132; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_6: 3133; GFX940: ; %bb.0: 3134; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3135; GFX940-NEXT: ;;#ASMSTART 3136; GFX940-NEXT: ; def v[0:1] 3137; GFX940-NEXT: ;;#ASMEND 3138; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3139; GFX940-NEXT: v_mov_b32_e32 v2, 0 3140; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 3141; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3142; GFX940-NEXT: s_waitcnt vmcnt(0) 3143; GFX940-NEXT: s_setpc_b64 s[30:31] 3144 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3145 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3146 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 4, i32 6> 3147 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3148 ret void 3149} 3150 3151define void @v_shuffle_v2bf16_v4bf16__5_6(ptr addrspace(1) inreg %ptr) { 3152; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__5_6: 3153; GFX900: ; %bb.0: 3154; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3155; GFX900-NEXT: ;;#ASMSTART 3156; GFX900-NEXT: ; def v[0:1] 3157; GFX900-NEXT: ;;#ASMEND 3158; GFX900-NEXT: v_mov_b32_e32 v2, 0 3159; GFX900-NEXT: v_alignbit_b32 v0, v1, v0, 16 3160; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3161; GFX900-NEXT: s_waitcnt vmcnt(0) 3162; GFX900-NEXT: s_setpc_b64 s[30:31] 3163; 3164; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__5_6: 3165; GFX90A: ; %bb.0: 3166; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3167; GFX90A-NEXT: ;;#ASMSTART 3168; GFX90A-NEXT: ; def v[0:1] 3169; GFX90A-NEXT: ;;#ASMEND 3170; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3171; GFX90A-NEXT: v_alignbit_b32 v0, v1, v0, 16 3172; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3173; GFX90A-NEXT: s_waitcnt vmcnt(0) 3174; GFX90A-NEXT: s_setpc_b64 s[30:31] 3175; 3176; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_6: 3177; GFX940: ; %bb.0: 3178; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3179; GFX940-NEXT: ;;#ASMSTART 3180; GFX940-NEXT: ; def v[0:1] 3181; GFX940-NEXT: ;;#ASMEND 3182; GFX940-NEXT: v_mov_b32_e32 v2, 0 3183; GFX940-NEXT: v_alignbit_b32 v0, v1, v0, 16 3184; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3185; GFX940-NEXT: s_waitcnt vmcnt(0) 3186; GFX940-NEXT: s_setpc_b64 s[30:31] 3187 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3188 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3189 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 6> 3190 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3191 ret void 3192} 3193 3194define void @v_shuffle_v2bf16_v4bf16__6_6(ptr addrspace(1) inreg %ptr) { 3195; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__6_6: 3196; GFX900: ; %bb.0: 3197; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3198; GFX900-NEXT: ;;#ASMSTART 3199; GFX900-NEXT: ; def v[0:1] 3200; GFX900-NEXT: ;;#ASMEND 3201; GFX900-NEXT: s_mov_b32 s4, 0x5040100 3202; GFX900-NEXT: v_mov_b32_e32 v2, 0 3203; GFX900-NEXT: v_perm_b32 v0, v1, v1, s4 3204; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3205; GFX900-NEXT: s_waitcnt vmcnt(0) 3206; GFX900-NEXT: s_setpc_b64 s[30:31] 3207; 3208; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__6_6: 3209; GFX90A: ; %bb.0: 3210; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3211; GFX90A-NEXT: ;;#ASMSTART 3212; GFX90A-NEXT: ; def v[0:1] 3213; GFX90A-NEXT: ;;#ASMEND 3214; GFX90A-NEXT: s_mov_b32 s4, 0x5040100 3215; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3216; GFX90A-NEXT: v_perm_b32 v0, v1, v1, s4 3217; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3218; GFX90A-NEXT: s_waitcnt vmcnt(0) 3219; GFX90A-NEXT: s_setpc_b64 s[30:31] 3220; 3221; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_6: 3222; GFX940: ; %bb.0: 3223; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3224; GFX940-NEXT: ;;#ASMSTART 3225; GFX940-NEXT: ; def v[0:1] 3226; GFX940-NEXT: ;;#ASMEND 3227; GFX940-NEXT: s_mov_b32 s2, 0x5040100 3228; GFX940-NEXT: v_mov_b32_e32 v2, 0 3229; GFX940-NEXT: v_perm_b32 v0, v1, v1, s2 3230; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3231; GFX940-NEXT: s_waitcnt vmcnt(0) 3232; GFX940-NEXT: s_setpc_b64 s[30:31] 3233 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3234 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3235 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 6> 3236 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3237 ret void 3238} 3239 3240define void @v_shuffle_v2bf16_v4bf16__u_7(ptr addrspace(1) inreg %ptr) { 3241; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__u_7: 3242; GFX900: ; %bb.0: 3243; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3244; GFX900-NEXT: v_mov_b32_e32 v2, 0 3245; GFX900-NEXT: ;;#ASMSTART 3246; GFX900-NEXT: ; def v[0:1] 3247; GFX900-NEXT: ;;#ASMEND 3248; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3249; GFX900-NEXT: s_waitcnt vmcnt(0) 3250; GFX900-NEXT: s_setpc_b64 s[30:31] 3251; 3252; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__u_7: 3253; GFX90A: ; %bb.0: 3254; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3255; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3256; GFX90A-NEXT: ;;#ASMSTART 3257; GFX90A-NEXT: ; def v[0:1] 3258; GFX90A-NEXT: ;;#ASMEND 3259; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3260; GFX90A-NEXT: s_waitcnt vmcnt(0) 3261; GFX90A-NEXT: s_setpc_b64 s[30:31] 3262; 3263; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__u_7: 3264; GFX940: ; %bb.0: 3265; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3266; GFX940-NEXT: v_mov_b32_e32 v2, 0 3267; GFX940-NEXT: ;;#ASMSTART 3268; GFX940-NEXT: ; def v[0:1] 3269; GFX940-NEXT: ;;#ASMEND 3270; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3271; GFX940-NEXT: s_waitcnt vmcnt(0) 3272; GFX940-NEXT: s_setpc_b64 s[30:31] 3273 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3274 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3275 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 poison, i32 7> 3276 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3277 ret void 3278} 3279 3280define void @v_shuffle_v2bf16_v4bf16__0_7(ptr addrspace(1) inreg %ptr) { 3281; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__0_7: 3282; GFX900: ; %bb.0: 3283; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3284; GFX900-NEXT: ;;#ASMSTART 3285; GFX900-NEXT: ; def v[0:1] 3286; GFX900-NEXT: ;;#ASMEND 3287; GFX900-NEXT: s_mov_b32 s4, 0xffff 3288; GFX900-NEXT: v_mov_b32_e32 v3, 0 3289; GFX900-NEXT: ;;#ASMSTART 3290; GFX900-NEXT: ; def v[1:2] 3291; GFX900-NEXT: ;;#ASMEND 3292; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v2 3293; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 3294; GFX900-NEXT: s_waitcnt vmcnt(0) 3295; GFX900-NEXT: s_setpc_b64 s[30:31] 3296; 3297; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__0_7: 3298; GFX90A: ; %bb.0: 3299; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3300; GFX90A-NEXT: ;;#ASMSTART 3301; GFX90A-NEXT: ; def v[0:1] 3302; GFX90A-NEXT: ;;#ASMEND 3303; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3304; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3305; GFX90A-NEXT: ;;#ASMSTART 3306; GFX90A-NEXT: ; def v[2:3] 3307; GFX90A-NEXT: ;;#ASMEND 3308; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v3 3309; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3310; GFX90A-NEXT: s_waitcnt vmcnt(0) 3311; GFX90A-NEXT: s_setpc_b64 s[30:31] 3312; 3313; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__0_7: 3314; GFX940: ; %bb.0: 3315; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3316; GFX940-NEXT: ;;#ASMSTART 3317; GFX940-NEXT: ; def v[0:1] 3318; GFX940-NEXT: ;;#ASMEND 3319; GFX940-NEXT: s_mov_b32 s2, 0xffff 3320; GFX940-NEXT: v_mov_b32_e32 v4, 0 3321; GFX940-NEXT: ;;#ASMSTART 3322; GFX940-NEXT: ; def v[2:3] 3323; GFX940-NEXT: ;;#ASMEND 3324; GFX940-NEXT: s_nop 0 3325; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v3 3326; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3327; GFX940-NEXT: s_waitcnt vmcnt(0) 3328; GFX940-NEXT: s_setpc_b64 s[30:31] 3329 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3330 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3331 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 0, i32 7> 3332 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3333 ret void 3334} 3335 3336define void @v_shuffle_v2bf16_v4bf16__1_7(ptr addrspace(1) inreg %ptr) { 3337; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__1_7: 3338; GFX900: ; %bb.0: 3339; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3340; GFX900-NEXT: ;;#ASMSTART 3341; GFX900-NEXT: ; def v[0:1] 3342; GFX900-NEXT: ;;#ASMEND 3343; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3344; GFX900-NEXT: v_mov_b32_e32 v3, 0 3345; GFX900-NEXT: ;;#ASMSTART 3346; GFX900-NEXT: ; def v[1:2] 3347; GFX900-NEXT: ;;#ASMEND 3348; GFX900-NEXT: v_perm_b32 v0, v2, v0, s4 3349; GFX900-NEXT: global_store_dword v3, v0, s[16:17] 3350; GFX900-NEXT: s_waitcnt vmcnt(0) 3351; GFX900-NEXT: s_setpc_b64 s[30:31] 3352; 3353; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__1_7: 3354; GFX90A: ; %bb.0: 3355; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3356; GFX90A-NEXT: ;;#ASMSTART 3357; GFX90A-NEXT: ; def v[0:1] 3358; GFX90A-NEXT: ;;#ASMEND 3359; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3360; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3361; GFX90A-NEXT: ;;#ASMSTART 3362; GFX90A-NEXT: ; def v[2:3] 3363; GFX90A-NEXT: ;;#ASMEND 3364; GFX90A-NEXT: v_perm_b32 v0, v3, v0, s4 3365; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3366; GFX90A-NEXT: s_waitcnt vmcnt(0) 3367; GFX90A-NEXT: s_setpc_b64 s[30:31] 3368; 3369; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__1_7: 3370; GFX940: ; %bb.0: 3371; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3372; GFX940-NEXT: ;;#ASMSTART 3373; GFX940-NEXT: ; def v[0:1] 3374; GFX940-NEXT: ;;#ASMEND 3375; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3376; GFX940-NEXT: v_mov_b32_e32 v4, 0 3377; GFX940-NEXT: ;;#ASMSTART 3378; GFX940-NEXT: ; def v[2:3] 3379; GFX940-NEXT: ;;#ASMEND 3380; GFX940-NEXT: s_nop 0 3381; GFX940-NEXT: v_perm_b32 v0, v3, v0, s2 3382; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3383; GFX940-NEXT: s_waitcnt vmcnt(0) 3384; GFX940-NEXT: s_setpc_b64 s[30:31] 3385 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3386 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3387 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 1, i32 7> 3388 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3389 ret void 3390} 3391 3392define void @v_shuffle_v2bf16_v4bf16__2_7(ptr addrspace(1) inreg %ptr) { 3393; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__2_7: 3394; GFX900: ; %bb.0: 3395; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3396; GFX900-NEXT: ;;#ASMSTART 3397; GFX900-NEXT: ; def v[0:1] 3398; GFX900-NEXT: ;;#ASMEND 3399; GFX900-NEXT: s_mov_b32 s4, 0xffff 3400; GFX900-NEXT: v_mov_b32_e32 v4, 0 3401; GFX900-NEXT: ;;#ASMSTART 3402; GFX900-NEXT: ; def v[2:3] 3403; GFX900-NEXT: ;;#ASMEND 3404; GFX900-NEXT: v_bfi_b32 v0, s4, v1, v3 3405; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3406; GFX900-NEXT: s_waitcnt vmcnt(0) 3407; GFX900-NEXT: s_setpc_b64 s[30:31] 3408; 3409; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__2_7: 3410; GFX90A: ; %bb.0: 3411; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3412; GFX90A-NEXT: ;;#ASMSTART 3413; GFX90A-NEXT: ; def v[0:1] 3414; GFX90A-NEXT: ;;#ASMEND 3415; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3416; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3417; GFX90A-NEXT: ;;#ASMSTART 3418; GFX90A-NEXT: ; def v[2:3] 3419; GFX90A-NEXT: ;;#ASMEND 3420; GFX90A-NEXT: v_bfi_b32 v0, s4, v1, v3 3421; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3422; GFX90A-NEXT: s_waitcnt vmcnt(0) 3423; GFX90A-NEXT: s_setpc_b64 s[30:31] 3424; 3425; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__2_7: 3426; GFX940: ; %bb.0: 3427; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3428; GFX940-NEXT: ;;#ASMSTART 3429; GFX940-NEXT: ; def v[0:1] 3430; GFX940-NEXT: ;;#ASMEND 3431; GFX940-NEXT: s_mov_b32 s2, 0xffff 3432; GFX940-NEXT: v_mov_b32_e32 v4, 0 3433; GFX940-NEXT: ;;#ASMSTART 3434; GFX940-NEXT: ; def v[2:3] 3435; GFX940-NEXT: ;;#ASMEND 3436; GFX940-NEXT: s_nop 0 3437; GFX940-NEXT: v_bfi_b32 v0, s2, v1, v3 3438; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3439; GFX940-NEXT: s_waitcnt vmcnt(0) 3440; GFX940-NEXT: s_setpc_b64 s[30:31] 3441 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3442 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3443 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 2, i32 7> 3444 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3445 ret void 3446} 3447 3448define void @v_shuffle_v2bf16_v4bf16__3_7(ptr addrspace(1) inreg %ptr) { 3449; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__3_7: 3450; GFX900: ; %bb.0: 3451; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3452; GFX900-NEXT: ;;#ASMSTART 3453; GFX900-NEXT: ; def v[0:1] 3454; GFX900-NEXT: ;;#ASMEND 3455; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3456; GFX900-NEXT: v_mov_b32_e32 v4, 0 3457; GFX900-NEXT: ;;#ASMSTART 3458; GFX900-NEXT: ; def v[2:3] 3459; GFX900-NEXT: ;;#ASMEND 3460; GFX900-NEXT: v_perm_b32 v0, v3, v1, s4 3461; GFX900-NEXT: global_store_dword v4, v0, s[16:17] 3462; GFX900-NEXT: s_waitcnt vmcnt(0) 3463; GFX900-NEXT: s_setpc_b64 s[30:31] 3464; 3465; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__3_7: 3466; GFX90A: ; %bb.0: 3467; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3468; GFX90A-NEXT: ;;#ASMSTART 3469; GFX90A-NEXT: ; def v[0:1] 3470; GFX90A-NEXT: ;;#ASMEND 3471; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3472; GFX90A-NEXT: v_mov_b32_e32 v4, 0 3473; GFX90A-NEXT: ;;#ASMSTART 3474; GFX90A-NEXT: ; def v[2:3] 3475; GFX90A-NEXT: ;;#ASMEND 3476; GFX90A-NEXT: v_perm_b32 v0, v3, v1, s4 3477; GFX90A-NEXT: global_store_dword v4, v0, s[16:17] 3478; GFX90A-NEXT: s_waitcnt vmcnt(0) 3479; GFX90A-NEXT: s_setpc_b64 s[30:31] 3480; 3481; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__3_7: 3482; GFX940: ; %bb.0: 3483; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3484; GFX940-NEXT: ;;#ASMSTART 3485; GFX940-NEXT: ; def v[0:1] 3486; GFX940-NEXT: ;;#ASMEND 3487; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3488; GFX940-NEXT: v_mov_b32_e32 v4, 0 3489; GFX940-NEXT: ;;#ASMSTART 3490; GFX940-NEXT: ; def v[2:3] 3491; GFX940-NEXT: ;;#ASMEND 3492; GFX940-NEXT: s_nop 0 3493; GFX940-NEXT: v_perm_b32 v0, v3, v1, s2 3494; GFX940-NEXT: global_store_dword v4, v0, s[0:1] sc0 sc1 3495; GFX940-NEXT: s_waitcnt vmcnt(0) 3496; GFX940-NEXT: s_setpc_b64 s[30:31] 3497 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3498 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3499 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 3, i32 7> 3500 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3501 ret void 3502} 3503 3504define void @v_shuffle_v2bf16_v4bf16__4_7(ptr addrspace(1) inreg %ptr) { 3505; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__4_7: 3506; GFX900: ; %bb.0: 3507; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3508; GFX900-NEXT: ;;#ASMSTART 3509; GFX900-NEXT: ; def v[0:1] 3510; GFX900-NEXT: ;;#ASMEND 3511; GFX900-NEXT: s_mov_b32 s4, 0xffff 3512; GFX900-NEXT: v_mov_b32_e32 v2, 0 3513; GFX900-NEXT: v_bfi_b32 v0, s4, v0, v1 3514; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3515; GFX900-NEXT: s_waitcnt vmcnt(0) 3516; GFX900-NEXT: s_setpc_b64 s[30:31] 3517; 3518; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__4_7: 3519; GFX90A: ; %bb.0: 3520; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3521; GFX90A-NEXT: ;;#ASMSTART 3522; GFX90A-NEXT: ; def v[0:1] 3523; GFX90A-NEXT: ;;#ASMEND 3524; GFX90A-NEXT: s_mov_b32 s4, 0xffff 3525; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3526; GFX90A-NEXT: v_bfi_b32 v0, s4, v0, v1 3527; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3528; GFX90A-NEXT: s_waitcnt vmcnt(0) 3529; GFX90A-NEXT: s_setpc_b64 s[30:31] 3530; 3531; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__4_7: 3532; GFX940: ; %bb.0: 3533; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3534; GFX940-NEXT: ;;#ASMSTART 3535; GFX940-NEXT: ; def v[0:1] 3536; GFX940-NEXT: ;;#ASMEND 3537; GFX940-NEXT: s_mov_b32 s2, 0xffff 3538; GFX940-NEXT: v_mov_b32_e32 v2, 0 3539; GFX940-NEXT: v_bfi_b32 v0, s2, v0, v1 3540; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3541; GFX940-NEXT: s_waitcnt vmcnt(0) 3542; GFX940-NEXT: s_setpc_b64 s[30:31] 3543 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3544 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3545 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 4, i32 7> 3546 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3547 ret void 3548} 3549 3550define void @v_shuffle_v2bf16_v4bf16__5_7(ptr addrspace(1) inreg %ptr) { 3551; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__5_7: 3552; GFX900: ; %bb.0: 3553; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3554; GFX900-NEXT: ;;#ASMSTART 3555; GFX900-NEXT: ; def v[0:1] 3556; GFX900-NEXT: ;;#ASMEND 3557; GFX900-NEXT: s_mov_b32 s4, 0x7060302 3558; GFX900-NEXT: v_mov_b32_e32 v2, 0 3559; GFX900-NEXT: v_perm_b32 v0, v1, v0, s4 3560; GFX900-NEXT: global_store_dword v2, v0, s[16:17] 3561; GFX900-NEXT: s_waitcnt vmcnt(0) 3562; GFX900-NEXT: s_setpc_b64 s[30:31] 3563; 3564; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__5_7: 3565; GFX90A: ; %bb.0: 3566; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3567; GFX90A-NEXT: ;;#ASMSTART 3568; GFX90A-NEXT: ; def v[0:1] 3569; GFX90A-NEXT: ;;#ASMEND 3570; GFX90A-NEXT: s_mov_b32 s4, 0x7060302 3571; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3572; GFX90A-NEXT: v_perm_b32 v0, v1, v0, s4 3573; GFX90A-NEXT: global_store_dword v2, v0, s[16:17] 3574; GFX90A-NEXT: s_waitcnt vmcnt(0) 3575; GFX90A-NEXT: s_setpc_b64 s[30:31] 3576; 3577; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__5_7: 3578; GFX940: ; %bb.0: 3579; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3580; GFX940-NEXT: ;;#ASMSTART 3581; GFX940-NEXT: ; def v[0:1] 3582; GFX940-NEXT: ;;#ASMEND 3583; GFX940-NEXT: s_mov_b32 s2, 0x7060302 3584; GFX940-NEXT: v_mov_b32_e32 v2, 0 3585; GFX940-NEXT: v_perm_b32 v0, v1, v0, s2 3586; GFX940-NEXT: global_store_dword v2, v0, s[0:1] sc0 sc1 3587; GFX940-NEXT: s_waitcnt vmcnt(0) 3588; GFX940-NEXT: s_setpc_b64 s[30:31] 3589 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3590 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3591 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 7> 3592 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3593 ret void 3594} 3595 3596define void @v_shuffle_v2bf16_v4bf16__6_7(ptr addrspace(1) inreg %ptr) { 3597; GFX900-LABEL: v_shuffle_v2bf16_v4bf16__6_7: 3598; GFX900: ; %bb.0: 3599; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3600; GFX900-NEXT: v_mov_b32_e32 v2, 0 3601; GFX900-NEXT: ;;#ASMSTART 3602; GFX900-NEXT: ; def v[0:1] 3603; GFX900-NEXT: ;;#ASMEND 3604; GFX900-NEXT: global_store_dword v2, v1, s[16:17] 3605; GFX900-NEXT: s_waitcnt vmcnt(0) 3606; GFX900-NEXT: s_setpc_b64 s[30:31] 3607; 3608; GFX90A-LABEL: v_shuffle_v2bf16_v4bf16__6_7: 3609; GFX90A: ; %bb.0: 3610; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3611; GFX90A-NEXT: v_mov_b32_e32 v2, 0 3612; GFX90A-NEXT: ;;#ASMSTART 3613; GFX90A-NEXT: ; def v[0:1] 3614; GFX90A-NEXT: ;;#ASMEND 3615; GFX90A-NEXT: global_store_dword v2, v1, s[16:17] 3616; GFX90A-NEXT: s_waitcnt vmcnt(0) 3617; GFX90A-NEXT: s_setpc_b64 s[30:31] 3618; 3619; GFX940-LABEL: v_shuffle_v2bf16_v4bf16__6_7: 3620; GFX940: ; %bb.0: 3621; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3622; GFX940-NEXT: v_mov_b32_e32 v2, 0 3623; GFX940-NEXT: ;;#ASMSTART 3624; GFX940-NEXT: ; def v[0:1] 3625; GFX940-NEXT: ;;#ASMEND 3626; GFX940-NEXT: global_store_dword v2, v1, s[0:1] sc0 sc1 3627; GFX940-NEXT: s_waitcnt vmcnt(0) 3628; GFX940-NEXT: s_setpc_b64 s[30:31] 3629 %vec0 = call <4 x bfloat> asm "; def $0", "=v"() 3630 %vec1 = call <4 x bfloat> asm "; def $0", "=v"() 3631 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 7> 3632 store <2 x bfloat> %shuf, ptr addrspace(1) %ptr, align 4 3633 ret void 3634} 3635 3636define void @s_shuffle_v2bf16_v4bf16__u_u() { 3637; GFX9-LABEL: s_shuffle_v2bf16_v4bf16__u_u: 3638; GFX9: ; %bb.0: 3639; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3640; GFX9-NEXT: ;;#ASMSTART 3641; GFX9-NEXT: ; use s8 3642; GFX9-NEXT: ;;#ASMEND 3643; GFX9-NEXT: s_setpc_b64 s[30:31] 3644 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 3645 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> poison 3646 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 3647 ret void 3648} 3649 3650define void @s_shuffle_v2bf16_v4bf16__0_u() { 3651; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__0_u: 3652; GFX900: ; %bb.0: 3653; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3654; GFX900-NEXT: ;;#ASMSTART 3655; GFX900-NEXT: ; def s[8:9] 3656; GFX900-NEXT: ;;#ASMEND 3657; GFX900-NEXT: ;;#ASMSTART 3658; GFX900-NEXT: ; use s8 3659; GFX900-NEXT: ;;#ASMEND 3660; GFX900-NEXT: s_setpc_b64 s[30:31] 3661; 3662; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__0_u: 3663; GFX90A: ; %bb.0: 3664; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3665; GFX90A-NEXT: ;;#ASMSTART 3666; GFX90A-NEXT: ; def s[8:9] 3667; GFX90A-NEXT: ;;#ASMEND 3668; GFX90A-NEXT: ;;#ASMSTART 3669; GFX90A-NEXT: ; use s8 3670; GFX90A-NEXT: ;;#ASMEND 3671; GFX90A-NEXT: s_setpc_b64 s[30:31] 3672; 3673; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_u: 3674; GFX940: ; %bb.0: 3675; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3676; GFX940-NEXT: ;;#ASMSTART 3677; GFX940-NEXT: ; def s[8:9] 3678; GFX940-NEXT: ;;#ASMEND 3679; GFX940-NEXT: s_nop 0 3680; GFX940-NEXT: ;;#ASMSTART 3681; GFX940-NEXT: ; use s8 3682; GFX940-NEXT: ;;#ASMEND 3683; GFX940-NEXT: s_setpc_b64 s[30:31] 3684 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 3685 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 poison> 3686 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 3687 ret void 3688} 3689 3690define void @s_shuffle_v2bf16_v4bf16__1_u() { 3691; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__1_u: 3692; GFX900: ; %bb.0: 3693; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3694; GFX900-NEXT: ;;#ASMSTART 3695; GFX900-NEXT: ; def s[4:5] 3696; GFX900-NEXT: ;;#ASMEND 3697; GFX900-NEXT: s_lshr_b32 s8, s4, 16 3698; GFX900-NEXT: ;;#ASMSTART 3699; GFX900-NEXT: ; use s8 3700; GFX900-NEXT: ;;#ASMEND 3701; GFX900-NEXT: s_setpc_b64 s[30:31] 3702; 3703; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__1_u: 3704; GFX90A: ; %bb.0: 3705; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3706; GFX90A-NEXT: ;;#ASMSTART 3707; GFX90A-NEXT: ; def s[4:5] 3708; GFX90A-NEXT: ;;#ASMEND 3709; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 3710; GFX90A-NEXT: ;;#ASMSTART 3711; GFX90A-NEXT: ; use s8 3712; GFX90A-NEXT: ;;#ASMEND 3713; GFX90A-NEXT: s_setpc_b64 s[30:31] 3714; 3715; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_u: 3716; GFX940: ; %bb.0: 3717; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3718; GFX940-NEXT: ;;#ASMSTART 3719; GFX940-NEXT: ; def s[0:1] 3720; GFX940-NEXT: ;;#ASMEND 3721; GFX940-NEXT: s_lshr_b32 s8, s0, 16 3722; GFX940-NEXT: ;;#ASMSTART 3723; GFX940-NEXT: ; use s8 3724; GFX940-NEXT: ;;#ASMEND 3725; GFX940-NEXT: s_setpc_b64 s[30:31] 3726 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 3727 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 poison> 3728 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 3729 ret void 3730} 3731 3732define void @s_shuffle_v2bf16_v4bf16__2_u() { 3733; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__2_u: 3734; GFX900: ; %bb.0: 3735; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3736; GFX900-NEXT: ;;#ASMSTART 3737; GFX900-NEXT: ; def s[4:5] 3738; GFX900-NEXT: ;;#ASMEND 3739; GFX900-NEXT: s_mov_b32 s8, s5 3740; GFX900-NEXT: ;;#ASMSTART 3741; GFX900-NEXT: ; use s8 3742; GFX900-NEXT: ;;#ASMEND 3743; GFX900-NEXT: s_setpc_b64 s[30:31] 3744; 3745; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__2_u: 3746; GFX90A: ; %bb.0: 3747; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3748; GFX90A-NEXT: ;;#ASMSTART 3749; GFX90A-NEXT: ; def s[4:5] 3750; GFX90A-NEXT: ;;#ASMEND 3751; GFX90A-NEXT: s_mov_b32 s8, s5 3752; GFX90A-NEXT: ;;#ASMSTART 3753; GFX90A-NEXT: ; use s8 3754; GFX90A-NEXT: ;;#ASMEND 3755; GFX90A-NEXT: s_setpc_b64 s[30:31] 3756; 3757; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_u: 3758; GFX940: ; %bb.0: 3759; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3760; GFX940-NEXT: ;;#ASMSTART 3761; GFX940-NEXT: ; def s[0:1] 3762; GFX940-NEXT: ;;#ASMEND 3763; GFX940-NEXT: s_mov_b32 s8, s1 3764; GFX940-NEXT: ;;#ASMSTART 3765; GFX940-NEXT: ; use s8 3766; GFX940-NEXT: ;;#ASMEND 3767; GFX940-NEXT: s_setpc_b64 s[30:31] 3768 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 3769 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 poison> 3770 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 3771 ret void 3772} 3773 3774define void @s_shuffle_v2bf16_v4bf16__3_u() { 3775; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__3_u: 3776; GFX900: ; %bb.0: 3777; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3778; GFX900-NEXT: ;;#ASMSTART 3779; GFX900-NEXT: ; def s[4:5] 3780; GFX900-NEXT: ;;#ASMEND 3781; GFX900-NEXT: s_lshr_b32 s8, s5, 16 3782; GFX900-NEXT: ;;#ASMSTART 3783; GFX900-NEXT: ; use s8 3784; GFX900-NEXT: ;;#ASMEND 3785; GFX900-NEXT: s_setpc_b64 s[30:31] 3786; 3787; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__3_u: 3788; GFX90A: ; %bb.0: 3789; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3790; GFX90A-NEXT: ;;#ASMSTART 3791; GFX90A-NEXT: ; def s[4:5] 3792; GFX90A-NEXT: ;;#ASMEND 3793; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 3794; GFX90A-NEXT: ;;#ASMSTART 3795; GFX90A-NEXT: ; use s8 3796; GFX90A-NEXT: ;;#ASMEND 3797; GFX90A-NEXT: s_setpc_b64 s[30:31] 3798; 3799; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_u: 3800; GFX940: ; %bb.0: 3801; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3802; GFX940-NEXT: ;;#ASMSTART 3803; GFX940-NEXT: ; def s[0:1] 3804; GFX940-NEXT: ;;#ASMEND 3805; GFX940-NEXT: s_lshr_b32 s8, s1, 16 3806; GFX940-NEXT: ;;#ASMSTART 3807; GFX940-NEXT: ; use s8 3808; GFX940-NEXT: ;;#ASMEND 3809; GFX940-NEXT: s_setpc_b64 s[30:31] 3810 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 3811 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 poison> 3812 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 3813 ret void 3814} 3815 3816define void @s_shuffle_v2bf16_v4bf16__4_u() { 3817; GFX9-LABEL: s_shuffle_v2bf16_v4bf16__4_u: 3818; GFX9: ; %bb.0: 3819; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3820; GFX9-NEXT: ;;#ASMSTART 3821; GFX9-NEXT: ; use s8 3822; GFX9-NEXT: ;;#ASMEND 3823; GFX9-NEXT: s_setpc_b64 s[30:31] 3824 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 3825 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 poison> 3826 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 3827 ret void 3828} 3829 3830define void @s_shuffle_v2bf16_v4bf16__5_u() { 3831; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__5_u: 3832; GFX900: ; %bb.0: 3833; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3834; GFX900-NEXT: ;;#ASMSTART 3835; GFX900-NEXT: ; def s[4:5] 3836; GFX900-NEXT: ;;#ASMEND 3837; GFX900-NEXT: s_lshr_b32 s8, s4, 16 3838; GFX900-NEXT: ;;#ASMSTART 3839; GFX900-NEXT: ; use s8 3840; GFX900-NEXT: ;;#ASMEND 3841; GFX900-NEXT: s_setpc_b64 s[30:31] 3842; 3843; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__5_u: 3844; GFX90A: ; %bb.0: 3845; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3846; GFX90A-NEXT: ;;#ASMSTART 3847; GFX90A-NEXT: ; def s[4:5] 3848; GFX90A-NEXT: ;;#ASMEND 3849; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 3850; GFX90A-NEXT: ;;#ASMSTART 3851; GFX90A-NEXT: ; use s8 3852; GFX90A-NEXT: ;;#ASMEND 3853; GFX90A-NEXT: s_setpc_b64 s[30:31] 3854; 3855; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_u: 3856; GFX940: ; %bb.0: 3857; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3858; GFX940-NEXT: ;;#ASMSTART 3859; GFX940-NEXT: ; def s[0:1] 3860; GFX940-NEXT: ;;#ASMEND 3861; GFX940-NEXT: s_lshr_b32 s8, s0, 16 3862; GFX940-NEXT: ;;#ASMSTART 3863; GFX940-NEXT: ; use s8 3864; GFX940-NEXT: ;;#ASMEND 3865; GFX940-NEXT: s_setpc_b64 s[30:31] 3866 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 3867 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 3868 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 poison> 3869 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 3870 ret void 3871} 3872 3873define void @s_shuffle_v2bf16_v4bf16__6_u() { 3874; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__6_u: 3875; GFX900: ; %bb.0: 3876; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3877; GFX900-NEXT: ;;#ASMSTART 3878; GFX900-NEXT: ; def s[4:5] 3879; GFX900-NEXT: ;;#ASMEND 3880; GFX900-NEXT: s_mov_b32 s8, s5 3881; GFX900-NEXT: ;;#ASMSTART 3882; GFX900-NEXT: ; use s8 3883; GFX900-NEXT: ;;#ASMEND 3884; GFX900-NEXT: s_setpc_b64 s[30:31] 3885; 3886; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__6_u: 3887; GFX90A: ; %bb.0: 3888; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3889; GFX90A-NEXT: ;;#ASMSTART 3890; GFX90A-NEXT: ; def s[4:5] 3891; GFX90A-NEXT: ;;#ASMEND 3892; GFX90A-NEXT: s_mov_b32 s8, s5 3893; GFX90A-NEXT: ;;#ASMSTART 3894; GFX90A-NEXT: ; use s8 3895; GFX90A-NEXT: ;;#ASMEND 3896; GFX90A-NEXT: s_setpc_b64 s[30:31] 3897; 3898; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_u: 3899; GFX940: ; %bb.0: 3900; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3901; GFX940-NEXT: ;;#ASMSTART 3902; GFX940-NEXT: ; def s[0:1] 3903; GFX940-NEXT: ;;#ASMEND 3904; GFX940-NEXT: s_mov_b32 s8, s1 3905; GFX940-NEXT: ;;#ASMSTART 3906; GFX940-NEXT: ; use s8 3907; GFX940-NEXT: ;;#ASMEND 3908; GFX940-NEXT: s_setpc_b64 s[30:31] 3909 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 3910 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 3911 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 poison> 3912 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 3913 ret void 3914} 3915 3916define void @s_shuffle_v2bf16_v4bf16__7_u() { 3917; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__7_u: 3918; GFX900: ; %bb.0: 3919; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3920; GFX900-NEXT: ;;#ASMSTART 3921; GFX900-NEXT: ; def s[4:5] 3922; GFX900-NEXT: ;;#ASMEND 3923; GFX900-NEXT: s_lshr_b32 s8, s5, 16 3924; GFX900-NEXT: ;;#ASMSTART 3925; GFX900-NEXT: ; use s8 3926; GFX900-NEXT: ;;#ASMEND 3927; GFX900-NEXT: s_setpc_b64 s[30:31] 3928; 3929; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__7_u: 3930; GFX90A: ; %bb.0: 3931; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3932; GFX90A-NEXT: ;;#ASMSTART 3933; GFX90A-NEXT: ; def s[4:5] 3934; GFX90A-NEXT: ;;#ASMEND 3935; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 3936; GFX90A-NEXT: ;;#ASMSTART 3937; GFX90A-NEXT: ; use s8 3938; GFX90A-NEXT: ;;#ASMEND 3939; GFX90A-NEXT: s_setpc_b64 s[30:31] 3940; 3941; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_u: 3942; GFX940: ; %bb.0: 3943; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3944; GFX940-NEXT: ;;#ASMSTART 3945; GFX940-NEXT: ; def s[0:1] 3946; GFX940-NEXT: ;;#ASMEND 3947; GFX940-NEXT: s_lshr_b32 s8, s1, 16 3948; GFX940-NEXT: ;;#ASMSTART 3949; GFX940-NEXT: ; use s8 3950; GFX940-NEXT: ;;#ASMEND 3951; GFX940-NEXT: s_setpc_b64 s[30:31] 3952 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 3953 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 3954 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 poison> 3955 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 3956 ret void 3957} 3958 3959define void @s_shuffle_v2bf16_v4bf16__7_0() { 3960; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__7_0: 3961; GFX900: ; %bb.0: 3962; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3963; GFX900-NEXT: ;;#ASMSTART 3964; GFX900-NEXT: ; def s[4:5] 3965; GFX900-NEXT: ;;#ASMEND 3966; GFX900-NEXT: ;;#ASMSTART 3967; GFX900-NEXT: ; def s[6:7] 3968; GFX900-NEXT: ;;#ASMEND 3969; GFX900-NEXT: s_lshr_b32 s5, s7, 16 3970; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3971; GFX900-NEXT: ;;#ASMSTART 3972; GFX900-NEXT: ; use s8 3973; GFX900-NEXT: ;;#ASMEND 3974; GFX900-NEXT: s_setpc_b64 s[30:31] 3975; 3976; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__7_0: 3977; GFX90A: ; %bb.0: 3978; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3979; GFX90A-NEXT: ;;#ASMSTART 3980; GFX90A-NEXT: ; def s[4:5] 3981; GFX90A-NEXT: ;;#ASMEND 3982; GFX90A-NEXT: ;;#ASMSTART 3983; GFX90A-NEXT: ; def s[6:7] 3984; GFX90A-NEXT: ;;#ASMEND 3985; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 3986; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 3987; GFX90A-NEXT: ;;#ASMSTART 3988; GFX90A-NEXT: ; use s8 3989; GFX90A-NEXT: ;;#ASMEND 3990; GFX90A-NEXT: s_setpc_b64 s[30:31] 3991; 3992; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_0: 3993; GFX940: ; %bb.0: 3994; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 3995; GFX940-NEXT: ;;#ASMSTART 3996; GFX940-NEXT: ; def s[0:1] 3997; GFX940-NEXT: ;;#ASMEND 3998; GFX940-NEXT: ;;#ASMSTART 3999; GFX940-NEXT: ; def s[2:3] 4000; GFX940-NEXT: ;;#ASMEND 4001; GFX940-NEXT: s_lshr_b32 s1, s3, 16 4002; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4003; GFX940-NEXT: ;;#ASMSTART 4004; GFX940-NEXT: ; use s8 4005; GFX940-NEXT: ;;#ASMEND 4006; GFX940-NEXT: s_setpc_b64 s[30:31] 4007 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4008 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4009 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 0> 4010 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4011 ret void 4012} 4013 4014define void @s_shuffle_v2bf16_v4bf16__7_1() { 4015; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__7_1: 4016; GFX900: ; %bb.0: 4017; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4018; GFX900-NEXT: ;;#ASMSTART 4019; GFX900-NEXT: ; def s[4:5] 4020; GFX900-NEXT: ;;#ASMEND 4021; GFX900-NEXT: ;;#ASMSTART 4022; GFX900-NEXT: ; def s[6:7] 4023; GFX900-NEXT: ;;#ASMEND 4024; GFX900-NEXT: s_lshr_b32 s4, s4, 16 4025; GFX900-NEXT: s_lshr_b32 s5, s7, 16 4026; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4027; GFX900-NEXT: ;;#ASMSTART 4028; GFX900-NEXT: ; use s8 4029; GFX900-NEXT: ;;#ASMEND 4030; GFX900-NEXT: s_setpc_b64 s[30:31] 4031; 4032; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__7_1: 4033; GFX90A: ; %bb.0: 4034; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4035; GFX90A-NEXT: ;;#ASMSTART 4036; GFX90A-NEXT: ; def s[4:5] 4037; GFX90A-NEXT: ;;#ASMEND 4038; GFX90A-NEXT: ;;#ASMSTART 4039; GFX90A-NEXT: ; def s[6:7] 4040; GFX90A-NEXT: ;;#ASMEND 4041; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 4042; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 4043; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4044; GFX90A-NEXT: ;;#ASMSTART 4045; GFX90A-NEXT: ; use s8 4046; GFX90A-NEXT: ;;#ASMEND 4047; GFX90A-NEXT: s_setpc_b64 s[30:31] 4048; 4049; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_1: 4050; GFX940: ; %bb.0: 4051; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4052; GFX940-NEXT: ;;#ASMSTART 4053; GFX940-NEXT: ; def s[0:1] 4054; GFX940-NEXT: ;;#ASMEND 4055; GFX940-NEXT: ;;#ASMSTART 4056; GFX940-NEXT: ; def s[2:3] 4057; GFX940-NEXT: ;;#ASMEND 4058; GFX940-NEXT: s_lshr_b32 s0, s0, 16 4059; GFX940-NEXT: s_lshr_b32 s1, s3, 16 4060; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4061; GFX940-NEXT: ;;#ASMSTART 4062; GFX940-NEXT: ; use s8 4063; GFX940-NEXT: ;;#ASMEND 4064; GFX940-NEXT: s_setpc_b64 s[30:31] 4065 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4066 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4067 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 1> 4068 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4069 ret void 4070} 4071 4072define void @s_shuffle_v2bf16_v4bf16__7_2() { 4073; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__7_2: 4074; GFX900: ; %bb.0: 4075; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4076; GFX900-NEXT: ;;#ASMSTART 4077; GFX900-NEXT: ; def s[4:5] 4078; GFX900-NEXT: ;;#ASMEND 4079; GFX900-NEXT: ;;#ASMSTART 4080; GFX900-NEXT: ; def s[6:7] 4081; GFX900-NEXT: ;;#ASMEND 4082; GFX900-NEXT: s_lshr_b32 s4, s7, 16 4083; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 4084; GFX900-NEXT: ;;#ASMSTART 4085; GFX900-NEXT: ; use s8 4086; GFX900-NEXT: ;;#ASMEND 4087; GFX900-NEXT: s_setpc_b64 s[30:31] 4088; 4089; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__7_2: 4090; GFX90A: ; %bb.0: 4091; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4092; GFX90A-NEXT: ;;#ASMSTART 4093; GFX90A-NEXT: ; def s[4:5] 4094; GFX90A-NEXT: ;;#ASMEND 4095; GFX90A-NEXT: ;;#ASMSTART 4096; GFX90A-NEXT: ; def s[6:7] 4097; GFX90A-NEXT: ;;#ASMEND 4098; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 4099; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 4100; GFX90A-NEXT: ;;#ASMSTART 4101; GFX90A-NEXT: ; use s8 4102; GFX90A-NEXT: ;;#ASMEND 4103; GFX90A-NEXT: s_setpc_b64 s[30:31] 4104; 4105; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_2: 4106; GFX940: ; %bb.0: 4107; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4108; GFX940-NEXT: ;;#ASMSTART 4109; GFX940-NEXT: ; def s[0:1] 4110; GFX940-NEXT: ;;#ASMEND 4111; GFX940-NEXT: ;;#ASMSTART 4112; GFX940-NEXT: ; def s[2:3] 4113; GFX940-NEXT: ;;#ASMEND 4114; GFX940-NEXT: s_lshr_b32 s0, s3, 16 4115; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 4116; GFX940-NEXT: ;;#ASMSTART 4117; GFX940-NEXT: ; use s8 4118; GFX940-NEXT: ;;#ASMEND 4119; GFX940-NEXT: s_setpc_b64 s[30:31] 4120 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4121 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4122 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 2> 4123 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4124 ret void 4125} 4126 4127define void @s_shuffle_v2bf16_v4bf16__7_3() { 4128; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__7_3: 4129; GFX900: ; %bb.0: 4130; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4131; GFX900-NEXT: ;;#ASMSTART 4132; GFX900-NEXT: ; def s[4:5] 4133; GFX900-NEXT: ;;#ASMEND 4134; GFX900-NEXT: ;;#ASMSTART 4135; GFX900-NEXT: ; def s[6:7] 4136; GFX900-NEXT: ;;#ASMEND 4137; GFX900-NEXT: s_lshr_b32 s4, s5, 16 4138; GFX900-NEXT: s_lshr_b32 s5, s7, 16 4139; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4140; GFX900-NEXT: ;;#ASMSTART 4141; GFX900-NEXT: ; use s8 4142; GFX900-NEXT: ;;#ASMEND 4143; GFX900-NEXT: s_setpc_b64 s[30:31] 4144; 4145; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__7_3: 4146; GFX90A: ; %bb.0: 4147; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4148; GFX90A-NEXT: ;;#ASMSTART 4149; GFX90A-NEXT: ; def s[4:5] 4150; GFX90A-NEXT: ;;#ASMEND 4151; GFX90A-NEXT: ;;#ASMSTART 4152; GFX90A-NEXT: ; def s[6:7] 4153; GFX90A-NEXT: ;;#ASMEND 4154; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 4155; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 4156; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4157; GFX90A-NEXT: ;;#ASMSTART 4158; GFX90A-NEXT: ; use s8 4159; GFX90A-NEXT: ;;#ASMEND 4160; GFX90A-NEXT: s_setpc_b64 s[30:31] 4161; 4162; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_3: 4163; GFX940: ; %bb.0: 4164; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4165; GFX940-NEXT: ;;#ASMSTART 4166; GFX940-NEXT: ; def s[0:1] 4167; GFX940-NEXT: ;;#ASMEND 4168; GFX940-NEXT: ;;#ASMSTART 4169; GFX940-NEXT: ; def s[2:3] 4170; GFX940-NEXT: ;;#ASMEND 4171; GFX940-NEXT: s_lshr_b32 s0, s1, 16 4172; GFX940-NEXT: s_lshr_b32 s1, s3, 16 4173; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4174; GFX940-NEXT: ;;#ASMSTART 4175; GFX940-NEXT: ; use s8 4176; GFX940-NEXT: ;;#ASMEND 4177; GFX940-NEXT: s_setpc_b64 s[30:31] 4178 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4179 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4180 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 3> 4181 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4182 ret void 4183} 4184 4185define void @s_shuffle_v2bf16_v4bf16__7_4() { 4186; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__7_4: 4187; GFX900: ; %bb.0: 4188; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4189; GFX900-NEXT: ;;#ASMSTART 4190; GFX900-NEXT: ; def s[4:5] 4191; GFX900-NEXT: ;;#ASMEND 4192; GFX900-NEXT: s_lshr_b32 s5, s5, 16 4193; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4194; GFX900-NEXT: ;;#ASMSTART 4195; GFX900-NEXT: ; use s8 4196; GFX900-NEXT: ;;#ASMEND 4197; GFX900-NEXT: s_setpc_b64 s[30:31] 4198; 4199; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__7_4: 4200; GFX90A: ; %bb.0: 4201; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4202; GFX90A-NEXT: ;;#ASMSTART 4203; GFX90A-NEXT: ; def s[4:5] 4204; GFX90A-NEXT: ;;#ASMEND 4205; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 4206; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4207; GFX90A-NEXT: ;;#ASMSTART 4208; GFX90A-NEXT: ; use s8 4209; GFX90A-NEXT: ;;#ASMEND 4210; GFX90A-NEXT: s_setpc_b64 s[30:31] 4211; 4212; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_4: 4213; GFX940: ; %bb.0: 4214; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4215; GFX940-NEXT: ;;#ASMSTART 4216; GFX940-NEXT: ; def s[0:1] 4217; GFX940-NEXT: ;;#ASMEND 4218; GFX940-NEXT: s_lshr_b32 s1, s1, 16 4219; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4220; GFX940-NEXT: ;;#ASMSTART 4221; GFX940-NEXT: ; use s8 4222; GFX940-NEXT: ;;#ASMEND 4223; GFX940-NEXT: s_setpc_b64 s[30:31] 4224 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4225 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4226 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 4> 4227 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4228 ret void 4229} 4230 4231define void @s_shuffle_v2bf16_v4bf16__7_5() { 4232; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__7_5: 4233; GFX900: ; %bb.0: 4234; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4235; GFX900-NEXT: ;;#ASMSTART 4236; GFX900-NEXT: ; def s[4:5] 4237; GFX900-NEXT: ;;#ASMEND 4238; GFX900-NEXT: s_lshr_b32 s4, s4, 16 4239; GFX900-NEXT: s_lshr_b32 s5, s5, 16 4240; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4241; GFX900-NEXT: ;;#ASMSTART 4242; GFX900-NEXT: ; use s8 4243; GFX900-NEXT: ;;#ASMEND 4244; GFX900-NEXT: s_setpc_b64 s[30:31] 4245; 4246; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__7_5: 4247; GFX90A: ; %bb.0: 4248; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4249; GFX90A-NEXT: ;;#ASMSTART 4250; GFX90A-NEXT: ; def s[4:5] 4251; GFX90A-NEXT: ;;#ASMEND 4252; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 4253; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 4254; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4255; GFX90A-NEXT: ;;#ASMSTART 4256; GFX90A-NEXT: ; use s8 4257; GFX90A-NEXT: ;;#ASMEND 4258; GFX90A-NEXT: s_setpc_b64 s[30:31] 4259; 4260; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_5: 4261; GFX940: ; %bb.0: 4262; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4263; GFX940-NEXT: ;;#ASMSTART 4264; GFX940-NEXT: ; def s[0:1] 4265; GFX940-NEXT: ;;#ASMEND 4266; GFX940-NEXT: s_lshr_b32 s0, s0, 16 4267; GFX940-NEXT: s_lshr_b32 s1, s1, 16 4268; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4269; GFX940-NEXT: ;;#ASMSTART 4270; GFX940-NEXT: ; use s8 4271; GFX940-NEXT: ;;#ASMEND 4272; GFX940-NEXT: s_setpc_b64 s[30:31] 4273 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4274 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4275 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 5> 4276 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4277 ret void 4278} 4279 4280define void @s_shuffle_v2bf16_v4bf16__7_6() { 4281; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__7_6: 4282; GFX900: ; %bb.0: 4283; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4284; GFX900-NEXT: ;;#ASMSTART 4285; GFX900-NEXT: ; def s[4:5] 4286; GFX900-NEXT: ;;#ASMEND 4287; GFX900-NEXT: s_lshr_b32 s4, s5, 16 4288; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 4289; GFX900-NEXT: ;;#ASMSTART 4290; GFX900-NEXT: ; use s8 4291; GFX900-NEXT: ;;#ASMEND 4292; GFX900-NEXT: s_setpc_b64 s[30:31] 4293; 4294; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__7_6: 4295; GFX90A: ; %bb.0: 4296; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4297; GFX90A-NEXT: ;;#ASMSTART 4298; GFX90A-NEXT: ; def s[4:5] 4299; GFX90A-NEXT: ;;#ASMEND 4300; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 4301; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 4302; GFX90A-NEXT: ;;#ASMSTART 4303; GFX90A-NEXT: ; use s8 4304; GFX90A-NEXT: ;;#ASMEND 4305; GFX90A-NEXT: s_setpc_b64 s[30:31] 4306; 4307; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_6: 4308; GFX940: ; %bb.0: 4309; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4310; GFX940-NEXT: ;;#ASMSTART 4311; GFX940-NEXT: ; def s[0:1] 4312; GFX940-NEXT: ;;#ASMEND 4313; GFX940-NEXT: s_lshr_b32 s0, s1, 16 4314; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 4315; GFX940-NEXT: ;;#ASMSTART 4316; GFX940-NEXT: ; use s8 4317; GFX940-NEXT: ;;#ASMEND 4318; GFX940-NEXT: s_setpc_b64 s[30:31] 4319 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4320 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4321 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 6> 4322 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4323 ret void 4324} 4325 4326define void @s_shuffle_v2bf16_v4bf16__7_7() { 4327; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__7_7: 4328; GFX900: ; %bb.0: 4329; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4330; GFX900-NEXT: ;;#ASMSTART 4331; GFX900-NEXT: ; def s[4:5] 4332; GFX900-NEXT: ;;#ASMEND 4333; GFX900-NEXT: s_lshr_b32 s4, s5, 16 4334; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 4335; GFX900-NEXT: ;;#ASMSTART 4336; GFX900-NEXT: ; use s8 4337; GFX900-NEXT: ;;#ASMEND 4338; GFX900-NEXT: s_setpc_b64 s[30:31] 4339; 4340; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__7_7: 4341; GFX90A: ; %bb.0: 4342; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4343; GFX90A-NEXT: ;;#ASMSTART 4344; GFX90A-NEXT: ; def s[4:5] 4345; GFX90A-NEXT: ;;#ASMEND 4346; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 4347; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 4348; GFX90A-NEXT: ;;#ASMSTART 4349; GFX90A-NEXT: ; use s8 4350; GFX90A-NEXT: ;;#ASMEND 4351; GFX90A-NEXT: s_setpc_b64 s[30:31] 4352; 4353; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__7_7: 4354; GFX940: ; %bb.0: 4355; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4356; GFX940-NEXT: ;;#ASMSTART 4357; GFX940-NEXT: ; def s[0:1] 4358; GFX940-NEXT: ;;#ASMEND 4359; GFX940-NEXT: s_lshr_b32 s0, s1, 16 4360; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 4361; GFX940-NEXT: ;;#ASMSTART 4362; GFX940-NEXT: ; use s8 4363; GFX940-NEXT: ;;#ASMEND 4364; GFX940-NEXT: s_setpc_b64 s[30:31] 4365 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4366 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4367 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 7, i32 7> 4368 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4369 ret void 4370} 4371 4372define void @s_shuffle_v2bf16_v4bf16__u_0() { 4373; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__u_0: 4374; GFX900: ; %bb.0: 4375; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4376; GFX900-NEXT: ;;#ASMSTART 4377; GFX900-NEXT: ; def s[4:5] 4378; GFX900-NEXT: ;;#ASMEND 4379; GFX900-NEXT: s_lshl_b32 s8, s4, 16 4380; GFX900-NEXT: ;;#ASMSTART 4381; GFX900-NEXT: ; use s8 4382; GFX900-NEXT: ;;#ASMEND 4383; GFX900-NEXT: s_setpc_b64 s[30:31] 4384; 4385; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__u_0: 4386; GFX90A: ; %bb.0: 4387; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4388; GFX90A-NEXT: ;;#ASMSTART 4389; GFX90A-NEXT: ; def s[4:5] 4390; GFX90A-NEXT: ;;#ASMEND 4391; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 4392; GFX90A-NEXT: ;;#ASMSTART 4393; GFX90A-NEXT: ; use s8 4394; GFX90A-NEXT: ;;#ASMEND 4395; GFX90A-NEXT: s_setpc_b64 s[30:31] 4396; 4397; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_0: 4398; GFX940: ; %bb.0: 4399; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4400; GFX940-NEXT: ;;#ASMSTART 4401; GFX940-NEXT: ; def s[0:1] 4402; GFX940-NEXT: ;;#ASMEND 4403; GFX940-NEXT: s_lshl_b32 s8, s0, 16 4404; GFX940-NEXT: ;;#ASMSTART 4405; GFX940-NEXT: ; use s8 4406; GFX940-NEXT: ;;#ASMEND 4407; GFX940-NEXT: s_setpc_b64 s[30:31] 4408 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4409 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 0> 4410 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4411 ret void 4412} 4413 4414define void @s_shuffle_v2bf16_v4bf16__0_0() { 4415; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__0_0: 4416; GFX900: ; %bb.0: 4417; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4418; GFX900-NEXT: ;;#ASMSTART 4419; GFX900-NEXT: ; def s[4:5] 4420; GFX900-NEXT: ;;#ASMEND 4421; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 4422; GFX900-NEXT: ;;#ASMSTART 4423; GFX900-NEXT: ; use s8 4424; GFX900-NEXT: ;;#ASMEND 4425; GFX900-NEXT: s_setpc_b64 s[30:31] 4426; 4427; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__0_0: 4428; GFX90A: ; %bb.0: 4429; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4430; GFX90A-NEXT: ;;#ASMSTART 4431; GFX90A-NEXT: ; def s[4:5] 4432; GFX90A-NEXT: ;;#ASMEND 4433; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 4434; GFX90A-NEXT: ;;#ASMSTART 4435; GFX90A-NEXT: ; use s8 4436; GFX90A-NEXT: ;;#ASMEND 4437; GFX90A-NEXT: s_setpc_b64 s[30:31] 4438; 4439; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_0: 4440; GFX940: ; %bb.0: 4441; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4442; GFX940-NEXT: ;;#ASMSTART 4443; GFX940-NEXT: ; def s[0:1] 4444; GFX940-NEXT: ;;#ASMEND 4445; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 4446; GFX940-NEXT: ;;#ASMSTART 4447; GFX940-NEXT: ; use s8 4448; GFX940-NEXT: ;;#ASMEND 4449; GFX940-NEXT: s_setpc_b64 s[30:31] 4450 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4451 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> zeroinitializer 4452 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4453 ret void 4454} 4455 4456define void @s_shuffle_v2bf16_v4bf16__1_0() { 4457; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__1_0: 4458; GFX900: ; %bb.0: 4459; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4460; GFX900-NEXT: ;;#ASMSTART 4461; GFX900-NEXT: ; def s[4:5] 4462; GFX900-NEXT: ;;#ASMEND 4463; GFX900-NEXT: s_lshr_b32 s5, s4, 16 4464; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4465; GFX900-NEXT: ;;#ASMSTART 4466; GFX900-NEXT: ; use s8 4467; GFX900-NEXT: ;;#ASMEND 4468; GFX900-NEXT: s_setpc_b64 s[30:31] 4469; 4470; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__1_0: 4471; GFX90A: ; %bb.0: 4472; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4473; GFX90A-NEXT: ;;#ASMSTART 4474; GFX90A-NEXT: ; def s[4:5] 4475; GFX90A-NEXT: ;;#ASMEND 4476; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 4477; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4478; GFX90A-NEXT: ;;#ASMSTART 4479; GFX90A-NEXT: ; use s8 4480; GFX90A-NEXT: ;;#ASMEND 4481; GFX90A-NEXT: s_setpc_b64 s[30:31] 4482; 4483; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_0: 4484; GFX940: ; %bb.0: 4485; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4486; GFX940-NEXT: ;;#ASMSTART 4487; GFX940-NEXT: ; def s[0:1] 4488; GFX940-NEXT: ;;#ASMEND 4489; GFX940-NEXT: s_lshr_b32 s1, s0, 16 4490; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4491; GFX940-NEXT: ;;#ASMSTART 4492; GFX940-NEXT: ; use s8 4493; GFX940-NEXT: ;;#ASMEND 4494; GFX940-NEXT: s_setpc_b64 s[30:31] 4495 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4496 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 0> 4497 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4498 ret void 4499} 4500 4501define void @s_shuffle_v2bf16_v4bf16__2_0() { 4502; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__2_0: 4503; GFX900: ; %bb.0: 4504; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4505; GFX900-NEXT: ;;#ASMSTART 4506; GFX900-NEXT: ; def s[4:5] 4507; GFX900-NEXT: ;;#ASMEND 4508; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4509; GFX900-NEXT: ;;#ASMSTART 4510; GFX900-NEXT: ; use s8 4511; GFX900-NEXT: ;;#ASMEND 4512; GFX900-NEXT: s_setpc_b64 s[30:31] 4513; 4514; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__2_0: 4515; GFX90A: ; %bb.0: 4516; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4517; GFX90A-NEXT: ;;#ASMSTART 4518; GFX90A-NEXT: ; def s[4:5] 4519; GFX90A-NEXT: ;;#ASMEND 4520; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4521; GFX90A-NEXT: ;;#ASMSTART 4522; GFX90A-NEXT: ; use s8 4523; GFX90A-NEXT: ;;#ASMEND 4524; GFX90A-NEXT: s_setpc_b64 s[30:31] 4525; 4526; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_0: 4527; GFX940: ; %bb.0: 4528; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4529; GFX940-NEXT: ;;#ASMSTART 4530; GFX940-NEXT: ; def s[0:1] 4531; GFX940-NEXT: ;;#ASMEND 4532; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4533; GFX940-NEXT: ;;#ASMSTART 4534; GFX940-NEXT: ; use s8 4535; GFX940-NEXT: ;;#ASMEND 4536; GFX940-NEXT: s_setpc_b64 s[30:31] 4537 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4538 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 0> 4539 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4540 ret void 4541} 4542 4543define void @s_shuffle_v2bf16_v4bf16__3_0() { 4544; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__3_0: 4545; GFX900: ; %bb.0: 4546; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4547; GFX900-NEXT: ;;#ASMSTART 4548; GFX900-NEXT: ; def s[4:5] 4549; GFX900-NEXT: ;;#ASMEND 4550; GFX900-NEXT: s_lshr_b32 s5, s5, 16 4551; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4552; GFX900-NEXT: ;;#ASMSTART 4553; GFX900-NEXT: ; use s8 4554; GFX900-NEXT: ;;#ASMEND 4555; GFX900-NEXT: s_setpc_b64 s[30:31] 4556; 4557; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__3_0: 4558; GFX90A: ; %bb.0: 4559; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4560; GFX90A-NEXT: ;;#ASMSTART 4561; GFX90A-NEXT: ; def s[4:5] 4562; GFX90A-NEXT: ;;#ASMEND 4563; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 4564; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4565; GFX90A-NEXT: ;;#ASMSTART 4566; GFX90A-NEXT: ; use s8 4567; GFX90A-NEXT: ;;#ASMEND 4568; GFX90A-NEXT: s_setpc_b64 s[30:31] 4569; 4570; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_0: 4571; GFX940: ; %bb.0: 4572; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4573; GFX940-NEXT: ;;#ASMSTART 4574; GFX940-NEXT: ; def s[0:1] 4575; GFX940-NEXT: ;;#ASMEND 4576; GFX940-NEXT: s_lshr_b32 s1, s1, 16 4577; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4578; GFX940-NEXT: ;;#ASMSTART 4579; GFX940-NEXT: ; use s8 4580; GFX940-NEXT: ;;#ASMEND 4581; GFX940-NEXT: s_setpc_b64 s[30:31] 4582 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4583 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 0> 4584 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4585 ret void 4586} 4587 4588define void @s_shuffle_v2bf16_v4bf16__4_0() { 4589; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__4_0: 4590; GFX900: ; %bb.0: 4591; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4592; GFX900-NEXT: ;;#ASMSTART 4593; GFX900-NEXT: ; def s[4:5] 4594; GFX900-NEXT: ;;#ASMEND 4595; GFX900-NEXT: s_lshl_b32 s8, s4, 16 4596; GFX900-NEXT: ;;#ASMSTART 4597; GFX900-NEXT: ; use s8 4598; GFX900-NEXT: ;;#ASMEND 4599; GFX900-NEXT: s_setpc_b64 s[30:31] 4600; 4601; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__4_0: 4602; GFX90A: ; %bb.0: 4603; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4604; GFX90A-NEXT: ;;#ASMSTART 4605; GFX90A-NEXT: ; def s[4:5] 4606; GFX90A-NEXT: ;;#ASMEND 4607; GFX90A-NEXT: s_lshl_b32 s8, s4, 16 4608; GFX90A-NEXT: ;;#ASMSTART 4609; GFX90A-NEXT: ; use s8 4610; GFX90A-NEXT: ;;#ASMEND 4611; GFX90A-NEXT: s_setpc_b64 s[30:31] 4612; 4613; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_0: 4614; GFX940: ; %bb.0: 4615; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4616; GFX940-NEXT: ;;#ASMSTART 4617; GFX940-NEXT: ; def s[0:1] 4618; GFX940-NEXT: ;;#ASMEND 4619; GFX940-NEXT: s_lshl_b32 s8, s0, 16 4620; GFX940-NEXT: ;;#ASMSTART 4621; GFX940-NEXT: ; use s8 4622; GFX940-NEXT: ;;#ASMEND 4623; GFX940-NEXT: s_setpc_b64 s[30:31] 4624 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4625 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 0> 4626 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4627 ret void 4628} 4629 4630define void @s_shuffle_v2bf16_v4bf16__5_0() { 4631; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__5_0: 4632; GFX900: ; %bb.0: 4633; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4634; GFX900-NEXT: ;;#ASMSTART 4635; GFX900-NEXT: ; def s[4:5] 4636; GFX900-NEXT: ;;#ASMEND 4637; GFX900-NEXT: ;;#ASMSTART 4638; GFX900-NEXT: ; def s[6:7] 4639; GFX900-NEXT: ;;#ASMEND 4640; GFX900-NEXT: s_lshr_b32 s5, s6, 16 4641; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4642; GFX900-NEXT: ;;#ASMSTART 4643; GFX900-NEXT: ; use s8 4644; GFX900-NEXT: ;;#ASMEND 4645; GFX900-NEXT: s_setpc_b64 s[30:31] 4646; 4647; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__5_0: 4648; GFX90A: ; %bb.0: 4649; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4650; GFX90A-NEXT: ;;#ASMSTART 4651; GFX90A-NEXT: ; def s[4:5] 4652; GFX90A-NEXT: ;;#ASMEND 4653; GFX90A-NEXT: ;;#ASMSTART 4654; GFX90A-NEXT: ; def s[6:7] 4655; GFX90A-NEXT: ;;#ASMEND 4656; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 4657; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4658; GFX90A-NEXT: ;;#ASMSTART 4659; GFX90A-NEXT: ; use s8 4660; GFX90A-NEXT: ;;#ASMEND 4661; GFX90A-NEXT: s_setpc_b64 s[30:31] 4662; 4663; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_0: 4664; GFX940: ; %bb.0: 4665; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4666; GFX940-NEXT: ;;#ASMSTART 4667; GFX940-NEXT: ; def s[0:1] 4668; GFX940-NEXT: ;;#ASMEND 4669; GFX940-NEXT: ;;#ASMSTART 4670; GFX940-NEXT: ; def s[2:3] 4671; GFX940-NEXT: ;;#ASMEND 4672; GFX940-NEXT: s_lshr_b32 s1, s2, 16 4673; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4674; GFX940-NEXT: ;;#ASMSTART 4675; GFX940-NEXT: ; use s8 4676; GFX940-NEXT: ;;#ASMEND 4677; GFX940-NEXT: s_setpc_b64 s[30:31] 4678 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4679 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4680 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 0> 4681 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4682 ret void 4683} 4684 4685define void @s_shuffle_v2bf16_v4bf16__6_0() { 4686; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__6_0: 4687; GFX900: ; %bb.0: 4688; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4689; GFX900-NEXT: ;;#ASMSTART 4690; GFX900-NEXT: ; def s[4:5] 4691; GFX900-NEXT: ;;#ASMEND 4692; GFX900-NEXT: ;;#ASMSTART 4693; GFX900-NEXT: ; def s[6:7] 4694; GFX900-NEXT: ;;#ASMEND 4695; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 4696; GFX900-NEXT: ;;#ASMSTART 4697; GFX900-NEXT: ; use s8 4698; GFX900-NEXT: ;;#ASMEND 4699; GFX900-NEXT: s_setpc_b64 s[30:31] 4700; 4701; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__6_0: 4702; GFX90A: ; %bb.0: 4703; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4704; GFX90A-NEXT: ;;#ASMSTART 4705; GFX90A-NEXT: ; def s[4:5] 4706; GFX90A-NEXT: ;;#ASMEND 4707; GFX90A-NEXT: ;;#ASMSTART 4708; GFX90A-NEXT: ; def s[6:7] 4709; GFX90A-NEXT: ;;#ASMEND 4710; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 4711; GFX90A-NEXT: ;;#ASMSTART 4712; GFX90A-NEXT: ; use s8 4713; GFX90A-NEXT: ;;#ASMEND 4714; GFX90A-NEXT: s_setpc_b64 s[30:31] 4715; 4716; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_0: 4717; GFX940: ; %bb.0: 4718; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4719; GFX940-NEXT: ;;#ASMSTART 4720; GFX940-NEXT: ; def s[0:1] 4721; GFX940-NEXT: ;;#ASMEND 4722; GFX940-NEXT: ;;#ASMSTART 4723; GFX940-NEXT: ; def s[2:3] 4724; GFX940-NEXT: ;;#ASMEND 4725; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 4726; GFX940-NEXT: ;;#ASMSTART 4727; GFX940-NEXT: ; use s8 4728; GFX940-NEXT: ;;#ASMEND 4729; GFX940-NEXT: s_setpc_b64 s[30:31] 4730 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4731 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 4732 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 0> 4733 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4734 ret void 4735} 4736 4737define void @s_shuffle_v2bf16_v4bf16__u_1() { 4738; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__u_1: 4739; GFX900: ; %bb.0: 4740; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4741; GFX900-NEXT: ;;#ASMSTART 4742; GFX900-NEXT: ; def s[8:9] 4743; GFX900-NEXT: ;;#ASMEND 4744; GFX900-NEXT: ;;#ASMSTART 4745; GFX900-NEXT: ; use s8 4746; GFX900-NEXT: ;;#ASMEND 4747; GFX900-NEXT: s_setpc_b64 s[30:31] 4748; 4749; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__u_1: 4750; GFX90A: ; %bb.0: 4751; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4752; GFX90A-NEXT: ;;#ASMSTART 4753; GFX90A-NEXT: ; def s[8:9] 4754; GFX90A-NEXT: ;;#ASMEND 4755; GFX90A-NEXT: ;;#ASMSTART 4756; GFX90A-NEXT: ; use s8 4757; GFX90A-NEXT: ;;#ASMEND 4758; GFX90A-NEXT: s_setpc_b64 s[30:31] 4759; 4760; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_1: 4761; GFX940: ; %bb.0: 4762; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4763; GFX940-NEXT: ;;#ASMSTART 4764; GFX940-NEXT: ; def s[8:9] 4765; GFX940-NEXT: ;;#ASMEND 4766; GFX940-NEXT: s_nop 0 4767; GFX940-NEXT: ;;#ASMSTART 4768; GFX940-NEXT: ; use s8 4769; GFX940-NEXT: ;;#ASMEND 4770; GFX940-NEXT: s_setpc_b64 s[30:31] 4771 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4772 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 1> 4773 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4774 ret void 4775} 4776 4777define void @s_shuffle_v2bf16_v4bf16__0_1() { 4778; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__0_1: 4779; GFX900: ; %bb.0: 4780; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4781; GFX900-NEXT: ;;#ASMSTART 4782; GFX900-NEXT: ; def s[8:9] 4783; GFX900-NEXT: ;;#ASMEND 4784; GFX900-NEXT: ;;#ASMSTART 4785; GFX900-NEXT: ; use s8 4786; GFX900-NEXT: ;;#ASMEND 4787; GFX900-NEXT: s_setpc_b64 s[30:31] 4788; 4789; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__0_1: 4790; GFX90A: ; %bb.0: 4791; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4792; GFX90A-NEXT: ;;#ASMSTART 4793; GFX90A-NEXT: ; def s[8:9] 4794; GFX90A-NEXT: ;;#ASMEND 4795; GFX90A-NEXT: ;;#ASMSTART 4796; GFX90A-NEXT: ; use s8 4797; GFX90A-NEXT: ;;#ASMEND 4798; GFX90A-NEXT: s_setpc_b64 s[30:31] 4799; 4800; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_1: 4801; GFX940: ; %bb.0: 4802; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4803; GFX940-NEXT: ;;#ASMSTART 4804; GFX940-NEXT: ; def s[8:9] 4805; GFX940-NEXT: ;;#ASMEND 4806; GFX940-NEXT: s_nop 0 4807; GFX940-NEXT: ;;#ASMSTART 4808; GFX940-NEXT: ; use s8 4809; GFX940-NEXT: ;;#ASMEND 4810; GFX940-NEXT: s_setpc_b64 s[30:31] 4811 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4812 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 1> 4813 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4814 ret void 4815} 4816 4817define void @s_shuffle_v2bf16_v4bf16__1_1() { 4818; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__1_1: 4819; GFX900: ; %bb.0: 4820; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4821; GFX900-NEXT: ;;#ASMSTART 4822; GFX900-NEXT: ; def s[4:5] 4823; GFX900-NEXT: ;;#ASMEND 4824; GFX900-NEXT: s_lshr_b32 s4, s4, 16 4825; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 4826; GFX900-NEXT: ;;#ASMSTART 4827; GFX900-NEXT: ; use s8 4828; GFX900-NEXT: ;;#ASMEND 4829; GFX900-NEXT: s_setpc_b64 s[30:31] 4830; 4831; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__1_1: 4832; GFX90A: ; %bb.0: 4833; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4834; GFX90A-NEXT: ;;#ASMSTART 4835; GFX90A-NEXT: ; def s[4:5] 4836; GFX90A-NEXT: ;;#ASMEND 4837; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 4838; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 4839; GFX90A-NEXT: ;;#ASMSTART 4840; GFX90A-NEXT: ; use s8 4841; GFX90A-NEXT: ;;#ASMEND 4842; GFX90A-NEXT: s_setpc_b64 s[30:31] 4843; 4844; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_1: 4845; GFX940: ; %bb.0: 4846; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4847; GFX940-NEXT: ;;#ASMSTART 4848; GFX940-NEXT: ; def s[0:1] 4849; GFX940-NEXT: ;;#ASMEND 4850; GFX940-NEXT: s_lshr_b32 s0, s0, 16 4851; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 4852; GFX940-NEXT: ;;#ASMSTART 4853; GFX940-NEXT: ; use s8 4854; GFX940-NEXT: ;;#ASMEND 4855; GFX940-NEXT: s_setpc_b64 s[30:31] 4856 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4857 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 1> 4858 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4859 ret void 4860} 4861 4862define void @s_shuffle_v2bf16_v4bf16__2_1() { 4863; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__2_1: 4864; GFX900: ; %bb.0: 4865; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4866; GFX900-NEXT: ;;#ASMSTART 4867; GFX900-NEXT: ; def s[4:5] 4868; GFX900-NEXT: ;;#ASMEND 4869; GFX900-NEXT: s_lshr_b32 s4, s4, 16 4870; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4871; GFX900-NEXT: ;;#ASMSTART 4872; GFX900-NEXT: ; use s8 4873; GFX900-NEXT: ;;#ASMEND 4874; GFX900-NEXT: s_setpc_b64 s[30:31] 4875; 4876; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__2_1: 4877; GFX90A: ; %bb.0: 4878; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4879; GFX90A-NEXT: ;;#ASMSTART 4880; GFX90A-NEXT: ; def s[4:5] 4881; GFX90A-NEXT: ;;#ASMEND 4882; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 4883; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4884; GFX90A-NEXT: ;;#ASMSTART 4885; GFX90A-NEXT: ; use s8 4886; GFX90A-NEXT: ;;#ASMEND 4887; GFX90A-NEXT: s_setpc_b64 s[30:31] 4888; 4889; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_1: 4890; GFX940: ; %bb.0: 4891; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4892; GFX940-NEXT: ;;#ASMSTART 4893; GFX940-NEXT: ; def s[0:1] 4894; GFX940-NEXT: ;;#ASMEND 4895; GFX940-NEXT: s_lshr_b32 s0, s0, 16 4896; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4897; GFX940-NEXT: ;;#ASMSTART 4898; GFX940-NEXT: ; use s8 4899; GFX940-NEXT: ;;#ASMEND 4900; GFX940-NEXT: s_setpc_b64 s[30:31] 4901 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4902 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 1> 4903 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4904 ret void 4905} 4906 4907define void @s_shuffle_v2bf16_v4bf16__3_1() { 4908; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__3_1: 4909; GFX900: ; %bb.0: 4910; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4911; GFX900-NEXT: ;;#ASMSTART 4912; GFX900-NEXT: ; def s[4:5] 4913; GFX900-NEXT: ;;#ASMEND 4914; GFX900-NEXT: s_lshr_b32 s4, s4, 16 4915; GFX900-NEXT: s_lshr_b32 s5, s5, 16 4916; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4917; GFX900-NEXT: ;;#ASMSTART 4918; GFX900-NEXT: ; use s8 4919; GFX900-NEXT: ;;#ASMEND 4920; GFX900-NEXT: s_setpc_b64 s[30:31] 4921; 4922; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__3_1: 4923; GFX90A: ; %bb.0: 4924; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4925; GFX90A-NEXT: ;;#ASMSTART 4926; GFX90A-NEXT: ; def s[4:5] 4927; GFX90A-NEXT: ;;#ASMEND 4928; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 4929; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 4930; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 4931; GFX90A-NEXT: ;;#ASMSTART 4932; GFX90A-NEXT: ; use s8 4933; GFX90A-NEXT: ;;#ASMEND 4934; GFX90A-NEXT: s_setpc_b64 s[30:31] 4935; 4936; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_1: 4937; GFX940: ; %bb.0: 4938; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4939; GFX940-NEXT: ;;#ASMSTART 4940; GFX940-NEXT: ; def s[0:1] 4941; GFX940-NEXT: ;;#ASMEND 4942; GFX940-NEXT: s_lshr_b32 s0, s0, 16 4943; GFX940-NEXT: s_lshr_b32 s1, s1, 16 4944; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 4945; GFX940-NEXT: ;;#ASMSTART 4946; GFX940-NEXT: ; use s8 4947; GFX940-NEXT: ;;#ASMEND 4948; GFX940-NEXT: s_setpc_b64 s[30:31] 4949 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4950 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 1> 4951 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4952 ret void 4953} 4954 4955define void @s_shuffle_v2bf16_v4bf16__4_1() { 4956; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__4_1: 4957; GFX900: ; %bb.0: 4958; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4959; GFX900-NEXT: ;;#ASMSTART 4960; GFX900-NEXT: ; def s[8:9] 4961; GFX900-NEXT: ;;#ASMEND 4962; GFX900-NEXT: ;;#ASMSTART 4963; GFX900-NEXT: ; use s8 4964; GFX900-NEXT: ;;#ASMEND 4965; GFX900-NEXT: s_setpc_b64 s[30:31] 4966; 4967; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__4_1: 4968; GFX90A: ; %bb.0: 4969; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4970; GFX90A-NEXT: ;;#ASMSTART 4971; GFX90A-NEXT: ; def s[8:9] 4972; GFX90A-NEXT: ;;#ASMEND 4973; GFX90A-NEXT: ;;#ASMSTART 4974; GFX90A-NEXT: ; use s8 4975; GFX90A-NEXT: ;;#ASMEND 4976; GFX90A-NEXT: s_setpc_b64 s[30:31] 4977; 4978; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_1: 4979; GFX940: ; %bb.0: 4980; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4981; GFX940-NEXT: ;;#ASMSTART 4982; GFX940-NEXT: ; def s[8:9] 4983; GFX940-NEXT: ;;#ASMEND 4984; GFX940-NEXT: s_nop 0 4985; GFX940-NEXT: ;;#ASMSTART 4986; GFX940-NEXT: ; use s8 4987; GFX940-NEXT: ;;#ASMEND 4988; GFX940-NEXT: s_setpc_b64 s[30:31] 4989 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 4990 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 1> 4991 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 4992 ret void 4993} 4994 4995define void @s_shuffle_v2bf16_v4bf16__5_1() { 4996; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__5_1: 4997; GFX900: ; %bb.0: 4998; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 4999; GFX900-NEXT: ;;#ASMSTART 5000; GFX900-NEXT: ; def s[4:5] 5001; GFX900-NEXT: ;;#ASMEND 5002; GFX900-NEXT: ;;#ASMSTART 5003; GFX900-NEXT: ; def s[6:7] 5004; GFX900-NEXT: ;;#ASMEND 5005; GFX900-NEXT: s_lshr_b32 s4, s4, 16 5006; GFX900-NEXT: s_lshr_b32 s5, s6, 16 5007; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5008; GFX900-NEXT: ;;#ASMSTART 5009; GFX900-NEXT: ; use s8 5010; GFX900-NEXT: ;;#ASMEND 5011; GFX900-NEXT: s_setpc_b64 s[30:31] 5012; 5013; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__5_1: 5014; GFX90A: ; %bb.0: 5015; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5016; GFX90A-NEXT: ;;#ASMSTART 5017; GFX90A-NEXT: ; def s[4:5] 5018; GFX90A-NEXT: ;;#ASMEND 5019; GFX90A-NEXT: ;;#ASMSTART 5020; GFX90A-NEXT: ; def s[6:7] 5021; GFX90A-NEXT: ;;#ASMEND 5022; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 5023; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 5024; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5025; GFX90A-NEXT: ;;#ASMSTART 5026; GFX90A-NEXT: ; use s8 5027; GFX90A-NEXT: ;;#ASMEND 5028; GFX90A-NEXT: s_setpc_b64 s[30:31] 5029; 5030; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_1: 5031; GFX940: ; %bb.0: 5032; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5033; GFX940-NEXT: ;;#ASMSTART 5034; GFX940-NEXT: ; def s[0:1] 5035; GFX940-NEXT: ;;#ASMEND 5036; GFX940-NEXT: ;;#ASMSTART 5037; GFX940-NEXT: ; def s[2:3] 5038; GFX940-NEXT: ;;#ASMEND 5039; GFX940-NEXT: s_lshr_b32 s0, s0, 16 5040; GFX940-NEXT: s_lshr_b32 s1, s2, 16 5041; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 5042; GFX940-NEXT: ;;#ASMSTART 5043; GFX940-NEXT: ; use s8 5044; GFX940-NEXT: ;;#ASMEND 5045; GFX940-NEXT: s_setpc_b64 s[30:31] 5046 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5047 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 5048 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 1> 5049 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5050 ret void 5051} 5052 5053define void @s_shuffle_v2bf16_v4bf16__6_1() { 5054; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__6_1: 5055; GFX900: ; %bb.0: 5056; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5057; GFX900-NEXT: ;;#ASMSTART 5058; GFX900-NEXT: ; def s[4:5] 5059; GFX900-NEXT: ;;#ASMEND 5060; GFX900-NEXT: s_lshr_b32 s4, s4, 16 5061; GFX900-NEXT: ;;#ASMSTART 5062; GFX900-NEXT: ; def s[6:7] 5063; GFX900-NEXT: ;;#ASMEND 5064; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 5065; GFX900-NEXT: ;;#ASMSTART 5066; GFX900-NEXT: ; use s8 5067; GFX900-NEXT: ;;#ASMEND 5068; GFX900-NEXT: s_setpc_b64 s[30:31] 5069; 5070; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__6_1: 5071; GFX90A: ; %bb.0: 5072; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5073; GFX90A-NEXT: ;;#ASMSTART 5074; GFX90A-NEXT: ; def s[4:5] 5075; GFX90A-NEXT: ;;#ASMEND 5076; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 5077; GFX90A-NEXT: ;;#ASMSTART 5078; GFX90A-NEXT: ; def s[6:7] 5079; GFX90A-NEXT: ;;#ASMEND 5080; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 5081; GFX90A-NEXT: ;;#ASMSTART 5082; GFX90A-NEXT: ; use s8 5083; GFX90A-NEXT: ;;#ASMEND 5084; GFX90A-NEXT: s_setpc_b64 s[30:31] 5085; 5086; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_1: 5087; GFX940: ; %bb.0: 5088; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5089; GFX940-NEXT: ;;#ASMSTART 5090; GFX940-NEXT: ; def s[0:1] 5091; GFX940-NEXT: ;;#ASMEND 5092; GFX940-NEXT: s_lshr_b32 s0, s0, 16 5093; GFX940-NEXT: ;;#ASMSTART 5094; GFX940-NEXT: ; def s[2:3] 5095; GFX940-NEXT: ;;#ASMEND 5096; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 5097; GFX940-NEXT: ;;#ASMSTART 5098; GFX940-NEXT: ; use s8 5099; GFX940-NEXT: ;;#ASMEND 5100; GFX940-NEXT: s_setpc_b64 s[30:31] 5101 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5102 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 5103 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 1> 5104 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5105 ret void 5106} 5107 5108define void @s_shuffle_v2bf16_v4bf16__u_2() { 5109; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__u_2: 5110; GFX900: ; %bb.0: 5111; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5112; GFX900-NEXT: ;;#ASMSTART 5113; GFX900-NEXT: ; def s[4:5] 5114; GFX900-NEXT: ;;#ASMEND 5115; GFX900-NEXT: s_lshl_b32 s8, s5, 16 5116; GFX900-NEXT: ;;#ASMSTART 5117; GFX900-NEXT: ; use s8 5118; GFX900-NEXT: ;;#ASMEND 5119; GFX900-NEXT: s_setpc_b64 s[30:31] 5120; 5121; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__u_2: 5122; GFX90A: ; %bb.0: 5123; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5124; GFX90A-NEXT: ;;#ASMSTART 5125; GFX90A-NEXT: ; def s[4:5] 5126; GFX90A-NEXT: ;;#ASMEND 5127; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 5128; GFX90A-NEXT: ;;#ASMSTART 5129; GFX90A-NEXT: ; use s8 5130; GFX90A-NEXT: ;;#ASMEND 5131; GFX90A-NEXT: s_setpc_b64 s[30:31] 5132; 5133; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_2: 5134; GFX940: ; %bb.0: 5135; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5136; GFX940-NEXT: ;;#ASMSTART 5137; GFX940-NEXT: ; def s[0:1] 5138; GFX940-NEXT: ;;#ASMEND 5139; GFX940-NEXT: s_lshl_b32 s8, s1, 16 5140; GFX940-NEXT: ;;#ASMSTART 5141; GFX940-NEXT: ; use s8 5142; GFX940-NEXT: ;;#ASMEND 5143; GFX940-NEXT: s_setpc_b64 s[30:31] 5144 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5145 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 2> 5146 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5147 ret void 5148} 5149 5150define void @s_shuffle_v2bf16_v4bf16__0_2() { 5151; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__0_2: 5152; GFX900: ; %bb.0: 5153; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5154; GFX900-NEXT: ;;#ASMSTART 5155; GFX900-NEXT: ; def s[4:5] 5156; GFX900-NEXT: ;;#ASMEND 5157; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5158; GFX900-NEXT: ;;#ASMSTART 5159; GFX900-NEXT: ; use s8 5160; GFX900-NEXT: ;;#ASMEND 5161; GFX900-NEXT: s_setpc_b64 s[30:31] 5162; 5163; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__0_2: 5164; GFX90A: ; %bb.0: 5165; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5166; GFX90A-NEXT: ;;#ASMSTART 5167; GFX90A-NEXT: ; def s[4:5] 5168; GFX90A-NEXT: ;;#ASMEND 5169; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5170; GFX90A-NEXT: ;;#ASMSTART 5171; GFX90A-NEXT: ; use s8 5172; GFX90A-NEXT: ;;#ASMEND 5173; GFX90A-NEXT: s_setpc_b64 s[30:31] 5174; 5175; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_2: 5176; GFX940: ; %bb.0: 5177; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5178; GFX940-NEXT: ;;#ASMSTART 5179; GFX940-NEXT: ; def s[0:1] 5180; GFX940-NEXT: ;;#ASMEND 5181; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 5182; GFX940-NEXT: ;;#ASMSTART 5183; GFX940-NEXT: ; use s8 5184; GFX940-NEXT: ;;#ASMEND 5185; GFX940-NEXT: s_setpc_b64 s[30:31] 5186 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5187 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 2> 5188 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5189 ret void 5190} 5191 5192define void @s_shuffle_v2bf16_v4bf16__1_2() { 5193; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__1_2: 5194; GFX900: ; %bb.0: 5195; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5196; GFX900-NEXT: ;;#ASMSTART 5197; GFX900-NEXT: ; def s[4:5] 5198; GFX900-NEXT: ;;#ASMEND 5199; GFX900-NEXT: s_lshr_b32 s4, s4, 16 5200; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5201; GFX900-NEXT: ;;#ASMSTART 5202; GFX900-NEXT: ; use s8 5203; GFX900-NEXT: ;;#ASMEND 5204; GFX900-NEXT: s_setpc_b64 s[30:31] 5205; 5206; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__1_2: 5207; GFX90A: ; %bb.0: 5208; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5209; GFX90A-NEXT: ;;#ASMSTART 5210; GFX90A-NEXT: ; def s[4:5] 5211; GFX90A-NEXT: ;;#ASMEND 5212; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 5213; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5214; GFX90A-NEXT: ;;#ASMSTART 5215; GFX90A-NEXT: ; use s8 5216; GFX90A-NEXT: ;;#ASMEND 5217; GFX90A-NEXT: s_setpc_b64 s[30:31] 5218; 5219; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_2: 5220; GFX940: ; %bb.0: 5221; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5222; GFX940-NEXT: ;;#ASMSTART 5223; GFX940-NEXT: ; def s[0:1] 5224; GFX940-NEXT: ;;#ASMEND 5225; GFX940-NEXT: s_lshr_b32 s0, s0, 16 5226; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 5227; GFX940-NEXT: ;;#ASMSTART 5228; GFX940-NEXT: ; use s8 5229; GFX940-NEXT: ;;#ASMEND 5230; GFX940-NEXT: s_setpc_b64 s[30:31] 5231 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5232 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 2> 5233 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5234 ret void 5235} 5236 5237define void @s_shuffle_v2bf16_v4bf16__2_2() { 5238; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__2_2: 5239; GFX900: ; %bb.0: 5240; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5241; GFX900-NEXT: ;;#ASMSTART 5242; GFX900-NEXT: ; def s[4:5] 5243; GFX900-NEXT: ;;#ASMEND 5244; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5245; GFX900-NEXT: ;;#ASMSTART 5246; GFX900-NEXT: ; use s8 5247; GFX900-NEXT: ;;#ASMEND 5248; GFX900-NEXT: s_setpc_b64 s[30:31] 5249; 5250; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__2_2: 5251; GFX90A: ; %bb.0: 5252; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5253; GFX90A-NEXT: ;;#ASMSTART 5254; GFX90A-NEXT: ; def s[4:5] 5255; GFX90A-NEXT: ;;#ASMEND 5256; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 5257; GFX90A-NEXT: ;;#ASMSTART 5258; GFX90A-NEXT: ; use s8 5259; GFX90A-NEXT: ;;#ASMEND 5260; GFX90A-NEXT: s_setpc_b64 s[30:31] 5261; 5262; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_2: 5263; GFX940: ; %bb.0: 5264; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5265; GFX940-NEXT: ;;#ASMSTART 5266; GFX940-NEXT: ; def s[0:1] 5267; GFX940-NEXT: ;;#ASMEND 5268; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 5269; GFX940-NEXT: ;;#ASMSTART 5270; GFX940-NEXT: ; use s8 5271; GFX940-NEXT: ;;#ASMEND 5272; GFX940-NEXT: s_setpc_b64 s[30:31] 5273 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5274 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 2> 5275 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5276 ret void 5277} 5278 5279define void @s_shuffle_v2bf16_v4bf16__3_2() { 5280; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__3_2: 5281; GFX900: ; %bb.0: 5282; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5283; GFX900-NEXT: ;;#ASMSTART 5284; GFX900-NEXT: ; def s[4:5] 5285; GFX900-NEXT: ;;#ASMEND 5286; GFX900-NEXT: s_lshr_b32 s4, s5, 16 5287; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5288; GFX900-NEXT: ;;#ASMSTART 5289; GFX900-NEXT: ; use s8 5290; GFX900-NEXT: ;;#ASMEND 5291; GFX900-NEXT: s_setpc_b64 s[30:31] 5292; 5293; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__3_2: 5294; GFX90A: ; %bb.0: 5295; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5296; GFX90A-NEXT: ;;#ASMSTART 5297; GFX90A-NEXT: ; def s[4:5] 5298; GFX90A-NEXT: ;;#ASMEND 5299; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 5300; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5301; GFX90A-NEXT: ;;#ASMSTART 5302; GFX90A-NEXT: ; use s8 5303; GFX90A-NEXT: ;;#ASMEND 5304; GFX90A-NEXT: s_setpc_b64 s[30:31] 5305; 5306; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_2: 5307; GFX940: ; %bb.0: 5308; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5309; GFX940-NEXT: ;;#ASMSTART 5310; GFX940-NEXT: ; def s[0:1] 5311; GFX940-NEXT: ;;#ASMEND 5312; GFX940-NEXT: s_lshr_b32 s0, s1, 16 5313; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 5314; GFX940-NEXT: ;;#ASMSTART 5315; GFX940-NEXT: ; use s8 5316; GFX940-NEXT: ;;#ASMEND 5317; GFX940-NEXT: s_setpc_b64 s[30:31] 5318 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5319 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 2> 5320 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5321 ret void 5322} 5323 5324define void @s_shuffle_v2bf16_v4bf16__4_2() { 5325; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__4_2: 5326; GFX900: ; %bb.0: 5327; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5328; GFX900-NEXT: ;;#ASMSTART 5329; GFX900-NEXT: ; def s[4:5] 5330; GFX900-NEXT: ;;#ASMEND 5331; GFX900-NEXT: s_lshl_b32 s8, s5, 16 5332; GFX900-NEXT: ;;#ASMSTART 5333; GFX900-NEXT: ; use s8 5334; GFX900-NEXT: ;;#ASMEND 5335; GFX900-NEXT: s_setpc_b64 s[30:31] 5336; 5337; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__4_2: 5338; GFX90A: ; %bb.0: 5339; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5340; GFX90A-NEXT: ;;#ASMSTART 5341; GFX90A-NEXT: ; def s[4:5] 5342; GFX90A-NEXT: ;;#ASMEND 5343; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 5344; GFX90A-NEXT: ;;#ASMSTART 5345; GFX90A-NEXT: ; use s8 5346; GFX90A-NEXT: ;;#ASMEND 5347; GFX90A-NEXT: s_setpc_b64 s[30:31] 5348; 5349; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_2: 5350; GFX940: ; %bb.0: 5351; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5352; GFX940-NEXT: ;;#ASMSTART 5353; GFX940-NEXT: ; def s[0:1] 5354; GFX940-NEXT: ;;#ASMEND 5355; GFX940-NEXT: s_lshl_b32 s8, s1, 16 5356; GFX940-NEXT: ;;#ASMSTART 5357; GFX940-NEXT: ; use s8 5358; GFX940-NEXT: ;;#ASMEND 5359; GFX940-NEXT: s_setpc_b64 s[30:31] 5360 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5361 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 2> 5362 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5363 ret void 5364} 5365 5366define void @s_shuffle_v2bf16_v4bf16__5_2() { 5367; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__5_2: 5368; GFX900: ; %bb.0: 5369; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5370; GFX900-NEXT: ;;#ASMSTART 5371; GFX900-NEXT: ; def s[4:5] 5372; GFX900-NEXT: ;;#ASMEND 5373; GFX900-NEXT: ;;#ASMSTART 5374; GFX900-NEXT: ; def s[6:7] 5375; GFX900-NEXT: ;;#ASMEND 5376; GFX900-NEXT: s_lshr_b32 s4, s6, 16 5377; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5378; GFX900-NEXT: ;;#ASMSTART 5379; GFX900-NEXT: ; use s8 5380; GFX900-NEXT: ;;#ASMEND 5381; GFX900-NEXT: s_setpc_b64 s[30:31] 5382; 5383; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__5_2: 5384; GFX90A: ; %bb.0: 5385; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5386; GFX90A-NEXT: ;;#ASMSTART 5387; GFX90A-NEXT: ; def s[4:5] 5388; GFX90A-NEXT: ;;#ASMEND 5389; GFX90A-NEXT: ;;#ASMSTART 5390; GFX90A-NEXT: ; def s[6:7] 5391; GFX90A-NEXT: ;;#ASMEND 5392; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 5393; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5394; GFX90A-NEXT: ;;#ASMSTART 5395; GFX90A-NEXT: ; use s8 5396; GFX90A-NEXT: ;;#ASMEND 5397; GFX90A-NEXT: s_setpc_b64 s[30:31] 5398; 5399; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_2: 5400; GFX940: ; %bb.0: 5401; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5402; GFX940-NEXT: ;;#ASMSTART 5403; GFX940-NEXT: ; def s[0:1] 5404; GFX940-NEXT: ;;#ASMEND 5405; GFX940-NEXT: ;;#ASMSTART 5406; GFX940-NEXT: ; def s[2:3] 5407; GFX940-NEXT: ;;#ASMEND 5408; GFX940-NEXT: s_lshr_b32 s0, s2, 16 5409; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 5410; GFX940-NEXT: ;;#ASMSTART 5411; GFX940-NEXT: ; use s8 5412; GFX940-NEXT: ;;#ASMEND 5413; GFX940-NEXT: s_setpc_b64 s[30:31] 5414 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5415 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 5416 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 2> 5417 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5418 ret void 5419} 5420 5421define void @s_shuffle_v2bf16_v4bf16__6_2() { 5422; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__6_2: 5423; GFX900: ; %bb.0: 5424; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5425; GFX900-NEXT: ;;#ASMSTART 5426; GFX900-NEXT: ; def s[4:5] 5427; GFX900-NEXT: ;;#ASMEND 5428; GFX900-NEXT: ;;#ASMSTART 5429; GFX900-NEXT: ; def s[6:7] 5430; GFX900-NEXT: ;;#ASMEND 5431; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s5 5432; GFX900-NEXT: ;;#ASMSTART 5433; GFX900-NEXT: ; use s8 5434; GFX900-NEXT: ;;#ASMEND 5435; GFX900-NEXT: s_setpc_b64 s[30:31] 5436; 5437; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__6_2: 5438; GFX90A: ; %bb.0: 5439; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5440; GFX90A-NEXT: ;;#ASMSTART 5441; GFX90A-NEXT: ; def s[4:5] 5442; GFX90A-NEXT: ;;#ASMEND 5443; GFX90A-NEXT: ;;#ASMSTART 5444; GFX90A-NEXT: ; def s[6:7] 5445; GFX90A-NEXT: ;;#ASMEND 5446; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s5 5447; GFX90A-NEXT: ;;#ASMSTART 5448; GFX90A-NEXT: ; use s8 5449; GFX90A-NEXT: ;;#ASMEND 5450; GFX90A-NEXT: s_setpc_b64 s[30:31] 5451; 5452; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_2: 5453; GFX940: ; %bb.0: 5454; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5455; GFX940-NEXT: ;;#ASMSTART 5456; GFX940-NEXT: ; def s[0:1] 5457; GFX940-NEXT: ;;#ASMEND 5458; GFX940-NEXT: ;;#ASMSTART 5459; GFX940-NEXT: ; def s[2:3] 5460; GFX940-NEXT: ;;#ASMEND 5461; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s1 5462; GFX940-NEXT: ;;#ASMSTART 5463; GFX940-NEXT: ; use s8 5464; GFX940-NEXT: ;;#ASMEND 5465; GFX940-NEXT: s_setpc_b64 s[30:31] 5466 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5467 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 5468 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 2> 5469 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5470 ret void 5471} 5472 5473define void @s_shuffle_v2bf16_v4bf16__u_3() { 5474; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__u_3: 5475; GFX900: ; %bb.0: 5476; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5477; GFX900-NEXT: ;;#ASMSTART 5478; GFX900-NEXT: ; def s[4:5] 5479; GFX900-NEXT: ;;#ASMEND 5480; GFX900-NEXT: s_mov_b32 s8, s5 5481; GFX900-NEXT: ;;#ASMSTART 5482; GFX900-NEXT: ; use s8 5483; GFX900-NEXT: ;;#ASMEND 5484; GFX900-NEXT: s_setpc_b64 s[30:31] 5485; 5486; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__u_3: 5487; GFX90A: ; %bb.0: 5488; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5489; GFX90A-NEXT: ;;#ASMSTART 5490; GFX90A-NEXT: ; def s[4:5] 5491; GFX90A-NEXT: ;;#ASMEND 5492; GFX90A-NEXT: s_mov_b32 s8, s5 5493; GFX90A-NEXT: ;;#ASMSTART 5494; GFX90A-NEXT: ; use s8 5495; GFX90A-NEXT: ;;#ASMEND 5496; GFX90A-NEXT: s_setpc_b64 s[30:31] 5497; 5498; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_3: 5499; GFX940: ; %bb.0: 5500; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5501; GFX940-NEXT: ;;#ASMSTART 5502; GFX940-NEXT: ; def s[0:1] 5503; GFX940-NEXT: ;;#ASMEND 5504; GFX940-NEXT: s_mov_b32 s8, s1 5505; GFX940-NEXT: ;;#ASMSTART 5506; GFX940-NEXT: ; use s8 5507; GFX940-NEXT: ;;#ASMEND 5508; GFX940-NEXT: s_setpc_b64 s[30:31] 5509 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5510 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 3> 5511 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5512 ret void 5513} 5514 5515define void @s_shuffle_v2bf16_v4bf16__0_3() { 5516; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__0_3: 5517; GFX900: ; %bb.0: 5518; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5519; GFX900-NEXT: ;;#ASMSTART 5520; GFX900-NEXT: ; def s[4:5] 5521; GFX900-NEXT: ;;#ASMEND 5522; GFX900-NEXT: s_lshr_b32 s5, s5, 16 5523; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5524; GFX900-NEXT: ;;#ASMSTART 5525; GFX900-NEXT: ; use s8 5526; GFX900-NEXT: ;;#ASMEND 5527; GFX900-NEXT: s_setpc_b64 s[30:31] 5528; 5529; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__0_3: 5530; GFX90A: ; %bb.0: 5531; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5532; GFX90A-NEXT: ;;#ASMSTART 5533; GFX90A-NEXT: ; def s[4:5] 5534; GFX90A-NEXT: ;;#ASMEND 5535; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 5536; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5537; GFX90A-NEXT: ;;#ASMSTART 5538; GFX90A-NEXT: ; use s8 5539; GFX90A-NEXT: ;;#ASMEND 5540; GFX90A-NEXT: s_setpc_b64 s[30:31] 5541; 5542; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_3: 5543; GFX940: ; %bb.0: 5544; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5545; GFX940-NEXT: ;;#ASMSTART 5546; GFX940-NEXT: ; def s[0:1] 5547; GFX940-NEXT: ;;#ASMEND 5548; GFX940-NEXT: s_lshr_b32 s1, s1, 16 5549; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 5550; GFX940-NEXT: ;;#ASMSTART 5551; GFX940-NEXT: ; use s8 5552; GFX940-NEXT: ;;#ASMEND 5553; GFX940-NEXT: s_setpc_b64 s[30:31] 5554 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5555 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 3> 5556 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5557 ret void 5558} 5559 5560define void @s_shuffle_v2bf16_v4bf16__1_3() { 5561; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__1_3: 5562; GFX900: ; %bb.0: 5563; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5564; GFX900-NEXT: ;;#ASMSTART 5565; GFX900-NEXT: ; def s[4:5] 5566; GFX900-NEXT: ;;#ASMEND 5567; GFX900-NEXT: s_lshr_b32 s5, s5, 16 5568; GFX900-NEXT: s_lshr_b32 s4, s4, 16 5569; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5570; GFX900-NEXT: ;;#ASMSTART 5571; GFX900-NEXT: ; use s8 5572; GFX900-NEXT: ;;#ASMEND 5573; GFX900-NEXT: s_setpc_b64 s[30:31] 5574; 5575; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__1_3: 5576; GFX90A: ; %bb.0: 5577; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5578; GFX90A-NEXT: ;;#ASMSTART 5579; GFX90A-NEXT: ; def s[4:5] 5580; GFX90A-NEXT: ;;#ASMEND 5581; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 5582; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 5583; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 5584; GFX90A-NEXT: ;;#ASMSTART 5585; GFX90A-NEXT: ; use s8 5586; GFX90A-NEXT: ;;#ASMEND 5587; GFX90A-NEXT: s_setpc_b64 s[30:31] 5588; 5589; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_3: 5590; GFX940: ; %bb.0: 5591; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5592; GFX940-NEXT: ;;#ASMSTART 5593; GFX940-NEXT: ; def s[0:1] 5594; GFX940-NEXT: ;;#ASMEND 5595; GFX940-NEXT: s_lshr_b32 s1, s1, 16 5596; GFX940-NEXT: s_lshr_b32 s0, s0, 16 5597; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 5598; GFX940-NEXT: ;;#ASMSTART 5599; GFX940-NEXT: ; use s8 5600; GFX940-NEXT: ;;#ASMEND 5601; GFX940-NEXT: s_setpc_b64 s[30:31] 5602 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5603 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 3> 5604 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5605 ret void 5606} 5607 5608define void @s_shuffle_v2bf16_v4bf16__2_3() { 5609; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__2_3: 5610; GFX900: ; %bb.0: 5611; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5612; GFX900-NEXT: ;;#ASMSTART 5613; GFX900-NEXT: ; def s[4:5] 5614; GFX900-NEXT: ;;#ASMEND 5615; GFX900-NEXT: s_mov_b32 s8, s5 5616; GFX900-NEXT: ;;#ASMSTART 5617; GFX900-NEXT: ; use s8 5618; GFX900-NEXT: ;;#ASMEND 5619; GFX900-NEXT: s_setpc_b64 s[30:31] 5620; 5621; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__2_3: 5622; GFX90A: ; %bb.0: 5623; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5624; GFX90A-NEXT: ;;#ASMSTART 5625; GFX90A-NEXT: ; def s[4:5] 5626; GFX90A-NEXT: ;;#ASMEND 5627; GFX90A-NEXT: s_mov_b32 s8, s5 5628; GFX90A-NEXT: ;;#ASMSTART 5629; GFX90A-NEXT: ; use s8 5630; GFX90A-NEXT: ;;#ASMEND 5631; GFX90A-NEXT: s_setpc_b64 s[30:31] 5632; 5633; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_3: 5634; GFX940: ; %bb.0: 5635; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5636; GFX940-NEXT: ;;#ASMSTART 5637; GFX940-NEXT: ; def s[0:1] 5638; GFX940-NEXT: ;;#ASMEND 5639; GFX940-NEXT: s_mov_b32 s8, s1 5640; GFX940-NEXT: ;;#ASMSTART 5641; GFX940-NEXT: ; use s8 5642; GFX940-NEXT: ;;#ASMEND 5643; GFX940-NEXT: s_setpc_b64 s[30:31] 5644 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5645 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 3> 5646 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5647 ret void 5648} 5649 5650define void @s_shuffle_v2bf16_v4bf16__3_3() { 5651; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__3_3: 5652; GFX900: ; %bb.0: 5653; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5654; GFX900-NEXT: ;;#ASMSTART 5655; GFX900-NEXT: ; def s[4:5] 5656; GFX900-NEXT: ;;#ASMEND 5657; GFX900-NEXT: s_lshr_b32 s4, s5, 16 5658; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 5659; GFX900-NEXT: ;;#ASMSTART 5660; GFX900-NEXT: ; use s8 5661; GFX900-NEXT: ;;#ASMEND 5662; GFX900-NEXT: s_setpc_b64 s[30:31] 5663; 5664; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__3_3: 5665; GFX90A: ; %bb.0: 5666; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5667; GFX90A-NEXT: ;;#ASMSTART 5668; GFX90A-NEXT: ; def s[4:5] 5669; GFX90A-NEXT: ;;#ASMEND 5670; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 5671; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 5672; GFX90A-NEXT: ;;#ASMSTART 5673; GFX90A-NEXT: ; use s8 5674; GFX90A-NEXT: ;;#ASMEND 5675; GFX90A-NEXT: s_setpc_b64 s[30:31] 5676; 5677; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_3: 5678; GFX940: ; %bb.0: 5679; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5680; GFX940-NEXT: ;;#ASMSTART 5681; GFX940-NEXT: ; def s[0:1] 5682; GFX940-NEXT: ;;#ASMEND 5683; GFX940-NEXT: s_lshr_b32 s0, s1, 16 5684; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 5685; GFX940-NEXT: ;;#ASMSTART 5686; GFX940-NEXT: ; use s8 5687; GFX940-NEXT: ;;#ASMEND 5688; GFX940-NEXT: s_setpc_b64 s[30:31] 5689 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5690 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 3> 5691 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5692 ret void 5693} 5694 5695define void @s_shuffle_v2bf16_v4bf16__4_3() { 5696; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__4_3: 5697; GFX900: ; %bb.0: 5698; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5699; GFX900-NEXT: ;;#ASMSTART 5700; GFX900-NEXT: ; def s[4:5] 5701; GFX900-NEXT: ;;#ASMEND 5702; GFX900-NEXT: s_mov_b32 s8, s5 5703; GFX900-NEXT: ;;#ASMSTART 5704; GFX900-NEXT: ; use s8 5705; GFX900-NEXT: ;;#ASMEND 5706; GFX900-NEXT: s_setpc_b64 s[30:31] 5707; 5708; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__4_3: 5709; GFX90A: ; %bb.0: 5710; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5711; GFX90A-NEXT: ;;#ASMSTART 5712; GFX90A-NEXT: ; def s[4:5] 5713; GFX90A-NEXT: ;;#ASMEND 5714; GFX90A-NEXT: s_mov_b32 s8, s5 5715; GFX90A-NEXT: ;;#ASMSTART 5716; GFX90A-NEXT: ; use s8 5717; GFX90A-NEXT: ;;#ASMEND 5718; GFX90A-NEXT: s_setpc_b64 s[30:31] 5719; 5720; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_3: 5721; GFX940: ; %bb.0: 5722; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5723; GFX940-NEXT: ;;#ASMSTART 5724; GFX940-NEXT: ; def s[0:1] 5725; GFX940-NEXT: ;;#ASMEND 5726; GFX940-NEXT: s_mov_b32 s8, s1 5727; GFX940-NEXT: ;;#ASMSTART 5728; GFX940-NEXT: ; use s8 5729; GFX940-NEXT: ;;#ASMEND 5730; GFX940-NEXT: s_setpc_b64 s[30:31] 5731 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5732 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 3> 5733 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5734 ret void 5735} 5736 5737define void @s_shuffle_v2bf16_v4bf16__5_3() { 5738; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__5_3: 5739; GFX900: ; %bb.0: 5740; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5741; GFX900-NEXT: ;;#ASMSTART 5742; GFX900-NEXT: ; def s[4:5] 5743; GFX900-NEXT: ;;#ASMEND 5744; GFX900-NEXT: ;;#ASMSTART 5745; GFX900-NEXT: ; def s[6:7] 5746; GFX900-NEXT: ;;#ASMEND 5747; GFX900-NEXT: s_lshr_b32 s4, s5, 16 5748; GFX900-NEXT: s_lshr_b32 s5, s6, 16 5749; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5750; GFX900-NEXT: ;;#ASMSTART 5751; GFX900-NEXT: ; use s8 5752; GFX900-NEXT: ;;#ASMEND 5753; GFX900-NEXT: s_setpc_b64 s[30:31] 5754; 5755; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__5_3: 5756; GFX90A: ; %bb.0: 5757; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5758; GFX90A-NEXT: ;;#ASMSTART 5759; GFX90A-NEXT: ; def s[4:5] 5760; GFX90A-NEXT: ;;#ASMEND 5761; GFX90A-NEXT: ;;#ASMSTART 5762; GFX90A-NEXT: ; def s[6:7] 5763; GFX90A-NEXT: ;;#ASMEND 5764; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 5765; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 5766; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 5767; GFX90A-NEXT: ;;#ASMSTART 5768; GFX90A-NEXT: ; use s8 5769; GFX90A-NEXT: ;;#ASMEND 5770; GFX90A-NEXT: s_setpc_b64 s[30:31] 5771; 5772; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_3: 5773; GFX940: ; %bb.0: 5774; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5775; GFX940-NEXT: ;;#ASMSTART 5776; GFX940-NEXT: ; def s[0:1] 5777; GFX940-NEXT: ;;#ASMEND 5778; GFX940-NEXT: ;;#ASMSTART 5779; GFX940-NEXT: ; def s[2:3] 5780; GFX940-NEXT: ;;#ASMEND 5781; GFX940-NEXT: s_lshr_b32 s0, s1, 16 5782; GFX940-NEXT: s_lshr_b32 s1, s2, 16 5783; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 5784; GFX940-NEXT: ;;#ASMSTART 5785; GFX940-NEXT: ; use s8 5786; GFX940-NEXT: ;;#ASMEND 5787; GFX940-NEXT: s_setpc_b64 s[30:31] 5788 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5789 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 5790 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 3> 5791 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5792 ret void 5793} 5794 5795define void @s_shuffle_v2bf16_v4bf16__6_3() { 5796; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__6_3: 5797; GFX900: ; %bb.0: 5798; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5799; GFX900-NEXT: ;;#ASMSTART 5800; GFX900-NEXT: ; def s[4:5] 5801; GFX900-NEXT: ;;#ASMEND 5802; GFX900-NEXT: s_lshr_b32 s4, s5, 16 5803; GFX900-NEXT: ;;#ASMSTART 5804; GFX900-NEXT: ; def s[6:7] 5805; GFX900-NEXT: ;;#ASMEND 5806; GFX900-NEXT: s_pack_ll_b32_b16 s8, s7, s4 5807; GFX900-NEXT: ;;#ASMSTART 5808; GFX900-NEXT: ; use s8 5809; GFX900-NEXT: ;;#ASMEND 5810; GFX900-NEXT: s_setpc_b64 s[30:31] 5811; 5812; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__6_3: 5813; GFX90A: ; %bb.0: 5814; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5815; GFX90A-NEXT: ;;#ASMSTART 5816; GFX90A-NEXT: ; def s[4:5] 5817; GFX90A-NEXT: ;;#ASMEND 5818; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 5819; GFX90A-NEXT: ;;#ASMSTART 5820; GFX90A-NEXT: ; def s[6:7] 5821; GFX90A-NEXT: ;;#ASMEND 5822; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s7, s4 5823; GFX90A-NEXT: ;;#ASMSTART 5824; GFX90A-NEXT: ; use s8 5825; GFX90A-NEXT: ;;#ASMEND 5826; GFX90A-NEXT: s_setpc_b64 s[30:31] 5827; 5828; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_3: 5829; GFX940: ; %bb.0: 5830; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5831; GFX940-NEXT: ;;#ASMSTART 5832; GFX940-NEXT: ; def s[0:1] 5833; GFX940-NEXT: ;;#ASMEND 5834; GFX940-NEXT: s_lshr_b32 s0, s1, 16 5835; GFX940-NEXT: ;;#ASMSTART 5836; GFX940-NEXT: ; def s[2:3] 5837; GFX940-NEXT: ;;#ASMEND 5838; GFX940-NEXT: s_pack_ll_b32_b16 s8, s3, s0 5839; GFX940-NEXT: ;;#ASMSTART 5840; GFX940-NEXT: ; use s8 5841; GFX940-NEXT: ;;#ASMEND 5842; GFX940-NEXT: s_setpc_b64 s[30:31] 5843 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5844 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 5845 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 3> 5846 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5847 ret void 5848} 5849 5850define void @s_shuffle_v2bf16_v4bf16__u_4() { 5851; GFX9-LABEL: s_shuffle_v2bf16_v4bf16__u_4: 5852; GFX9: ; %bb.0: 5853; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5854; GFX9-NEXT: ;;#ASMSTART 5855; GFX9-NEXT: ; use s8 5856; GFX9-NEXT: ;;#ASMEND 5857; GFX9-NEXT: s_setpc_b64 s[30:31] 5858 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5859 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 poison, i32 4> 5860 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5861 ret void 5862} 5863 5864define void @s_shuffle_v2bf16_v4bf16__0_4() { 5865; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__0_4: 5866; GFX900: ; %bb.0: 5867; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5868; GFX900-NEXT: ;;#ASMSTART 5869; GFX900-NEXT: ; def s[8:9] 5870; GFX900-NEXT: ;;#ASMEND 5871; GFX900-NEXT: ;;#ASMSTART 5872; GFX900-NEXT: ; use s8 5873; GFX900-NEXT: ;;#ASMEND 5874; GFX900-NEXT: s_setpc_b64 s[30:31] 5875; 5876; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__0_4: 5877; GFX90A: ; %bb.0: 5878; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5879; GFX90A-NEXT: ;;#ASMSTART 5880; GFX90A-NEXT: ; def s[8:9] 5881; GFX90A-NEXT: ;;#ASMEND 5882; GFX90A-NEXT: ;;#ASMSTART 5883; GFX90A-NEXT: ; use s8 5884; GFX90A-NEXT: ;;#ASMEND 5885; GFX90A-NEXT: s_setpc_b64 s[30:31] 5886; 5887; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_4: 5888; GFX940: ; %bb.0: 5889; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5890; GFX940-NEXT: ;;#ASMSTART 5891; GFX940-NEXT: ; def s[8:9] 5892; GFX940-NEXT: ;;#ASMEND 5893; GFX940-NEXT: s_nop 0 5894; GFX940-NEXT: ;;#ASMSTART 5895; GFX940-NEXT: ; use s8 5896; GFX940-NEXT: ;;#ASMEND 5897; GFX940-NEXT: s_setpc_b64 s[30:31] 5898 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5899 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 0, i32 4> 5900 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5901 ret void 5902} 5903 5904define void @s_shuffle_v2bf16_v4bf16__1_4() { 5905; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__1_4: 5906; GFX900: ; %bb.0: 5907; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5908; GFX900-NEXT: ;;#ASMSTART 5909; GFX900-NEXT: ; def s[4:5] 5910; GFX900-NEXT: ;;#ASMEND 5911; GFX900-NEXT: s_lshr_b32 s8, s4, 16 5912; GFX900-NEXT: ;;#ASMSTART 5913; GFX900-NEXT: ; use s8 5914; GFX900-NEXT: ;;#ASMEND 5915; GFX900-NEXT: s_setpc_b64 s[30:31] 5916; 5917; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__1_4: 5918; GFX90A: ; %bb.0: 5919; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5920; GFX90A-NEXT: ;;#ASMSTART 5921; GFX90A-NEXT: ; def s[4:5] 5922; GFX90A-NEXT: ;;#ASMEND 5923; GFX90A-NEXT: s_lshr_b32 s8, s4, 16 5924; GFX90A-NEXT: ;;#ASMSTART 5925; GFX90A-NEXT: ; use s8 5926; GFX90A-NEXT: ;;#ASMEND 5927; GFX90A-NEXT: s_setpc_b64 s[30:31] 5928; 5929; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_4: 5930; GFX940: ; %bb.0: 5931; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5932; GFX940-NEXT: ;;#ASMSTART 5933; GFX940-NEXT: ; def s[0:1] 5934; GFX940-NEXT: ;;#ASMEND 5935; GFX940-NEXT: s_lshr_b32 s8, s0, 16 5936; GFX940-NEXT: ;;#ASMSTART 5937; GFX940-NEXT: ; use s8 5938; GFX940-NEXT: ;;#ASMEND 5939; GFX940-NEXT: s_setpc_b64 s[30:31] 5940 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5941 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 1, i32 4> 5942 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5943 ret void 5944} 5945 5946define void @s_shuffle_v2bf16_v4bf16__2_4() { 5947; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__2_4: 5948; GFX900: ; %bb.0: 5949; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5950; GFX900-NEXT: ;;#ASMSTART 5951; GFX900-NEXT: ; def s[4:5] 5952; GFX900-NEXT: ;;#ASMEND 5953; GFX900-NEXT: s_mov_b32 s8, s5 5954; GFX900-NEXT: ;;#ASMSTART 5955; GFX900-NEXT: ; use s8 5956; GFX900-NEXT: ;;#ASMEND 5957; GFX900-NEXT: s_setpc_b64 s[30:31] 5958; 5959; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__2_4: 5960; GFX90A: ; %bb.0: 5961; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5962; GFX90A-NEXT: ;;#ASMSTART 5963; GFX90A-NEXT: ; def s[4:5] 5964; GFX90A-NEXT: ;;#ASMEND 5965; GFX90A-NEXT: s_mov_b32 s8, s5 5966; GFX90A-NEXT: ;;#ASMSTART 5967; GFX90A-NEXT: ; use s8 5968; GFX90A-NEXT: ;;#ASMEND 5969; GFX90A-NEXT: s_setpc_b64 s[30:31] 5970; 5971; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_4: 5972; GFX940: ; %bb.0: 5973; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5974; GFX940-NEXT: ;;#ASMSTART 5975; GFX940-NEXT: ; def s[0:1] 5976; GFX940-NEXT: ;;#ASMEND 5977; GFX940-NEXT: s_mov_b32 s8, s1 5978; GFX940-NEXT: ;;#ASMSTART 5979; GFX940-NEXT: ; use s8 5980; GFX940-NEXT: ;;#ASMEND 5981; GFX940-NEXT: s_setpc_b64 s[30:31] 5982 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 5983 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 2, i32 4> 5984 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 5985 ret void 5986} 5987 5988define void @s_shuffle_v2bf16_v4bf16__3_4() { 5989; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__3_4: 5990; GFX900: ; %bb.0: 5991; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 5992; GFX900-NEXT: ;;#ASMSTART 5993; GFX900-NEXT: ; def s[4:5] 5994; GFX900-NEXT: ;;#ASMEND 5995; GFX900-NEXT: s_lshr_b32 s8, s5, 16 5996; GFX900-NEXT: ;;#ASMSTART 5997; GFX900-NEXT: ; use s8 5998; GFX900-NEXT: ;;#ASMEND 5999; GFX900-NEXT: s_setpc_b64 s[30:31] 6000; 6001; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__3_4: 6002; GFX90A: ; %bb.0: 6003; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6004; GFX90A-NEXT: ;;#ASMSTART 6005; GFX90A-NEXT: ; def s[4:5] 6006; GFX90A-NEXT: ;;#ASMEND 6007; GFX90A-NEXT: s_lshr_b32 s8, s5, 16 6008; GFX90A-NEXT: ;;#ASMSTART 6009; GFX90A-NEXT: ; use s8 6010; GFX90A-NEXT: ;;#ASMEND 6011; GFX90A-NEXT: s_setpc_b64 s[30:31] 6012; 6013; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_4: 6014; GFX940: ; %bb.0: 6015; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6016; GFX940-NEXT: ;;#ASMSTART 6017; GFX940-NEXT: ; def s[0:1] 6018; GFX940-NEXT: ;;#ASMEND 6019; GFX940-NEXT: s_lshr_b32 s8, s1, 16 6020; GFX940-NEXT: ;;#ASMSTART 6021; GFX940-NEXT: ; use s8 6022; GFX940-NEXT: ;;#ASMEND 6023; GFX940-NEXT: s_setpc_b64 s[30:31] 6024 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6025 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 3, i32 4> 6026 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6027 ret void 6028} 6029 6030define void @s_shuffle_v2bf16_v4bf16__4_4() { 6031; GFX9-LABEL: s_shuffle_v2bf16_v4bf16__4_4: 6032; GFX9: ; %bb.0: 6033; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6034; GFX9-NEXT: ;;#ASMSTART 6035; GFX9-NEXT: ; use s8 6036; GFX9-NEXT: ;;#ASMEND 6037; GFX9-NEXT: s_setpc_b64 s[30:31] 6038 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6039 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> poison, <2 x i32> <i32 4, i32 4> 6040 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6041 ret void 6042} 6043 6044define void @s_shuffle_v2bf16_v4bf16__5_4() { 6045; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__5_4: 6046; GFX900: ; %bb.0: 6047; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6048; GFX900-NEXT: ;;#ASMSTART 6049; GFX900-NEXT: ; def s[4:5] 6050; GFX900-NEXT: ;;#ASMEND 6051; GFX900-NEXT: s_lshr_b32 s5, s4, 16 6052; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6053; GFX900-NEXT: ;;#ASMSTART 6054; GFX900-NEXT: ; use s8 6055; GFX900-NEXT: ;;#ASMEND 6056; GFX900-NEXT: s_setpc_b64 s[30:31] 6057; 6058; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__5_4: 6059; GFX90A: ; %bb.0: 6060; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6061; GFX90A-NEXT: ;;#ASMSTART 6062; GFX90A-NEXT: ; def s[4:5] 6063; GFX90A-NEXT: ;;#ASMEND 6064; GFX90A-NEXT: s_lshr_b32 s5, s4, 16 6065; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6066; GFX90A-NEXT: ;;#ASMSTART 6067; GFX90A-NEXT: ; use s8 6068; GFX90A-NEXT: ;;#ASMEND 6069; GFX90A-NEXT: s_setpc_b64 s[30:31] 6070; 6071; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_4: 6072; GFX940: ; %bb.0: 6073; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6074; GFX940-NEXT: ;;#ASMSTART 6075; GFX940-NEXT: ; def s[0:1] 6076; GFX940-NEXT: ;;#ASMEND 6077; GFX940-NEXT: s_lshr_b32 s1, s0, 16 6078; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 6079; GFX940-NEXT: ;;#ASMSTART 6080; GFX940-NEXT: ; use s8 6081; GFX940-NEXT: ;;#ASMEND 6082; GFX940-NEXT: s_setpc_b64 s[30:31] 6083 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6084 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6085 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 4> 6086 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6087 ret void 6088} 6089 6090define void @s_shuffle_v2bf16_v4bf16__6_4() { 6091; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__6_4: 6092; GFX900: ; %bb.0: 6093; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6094; GFX900-NEXT: ;;#ASMSTART 6095; GFX900-NEXT: ; def s[4:5] 6096; GFX900-NEXT: ;;#ASMEND 6097; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6098; GFX900-NEXT: ;;#ASMSTART 6099; GFX900-NEXT: ; use s8 6100; GFX900-NEXT: ;;#ASMEND 6101; GFX900-NEXT: s_setpc_b64 s[30:31] 6102; 6103; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__6_4: 6104; GFX90A: ; %bb.0: 6105; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6106; GFX90A-NEXT: ;;#ASMSTART 6107; GFX90A-NEXT: ; def s[4:5] 6108; GFX90A-NEXT: ;;#ASMEND 6109; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6110; GFX90A-NEXT: ;;#ASMSTART 6111; GFX90A-NEXT: ; use s8 6112; GFX90A-NEXT: ;;#ASMEND 6113; GFX90A-NEXT: s_setpc_b64 s[30:31] 6114; 6115; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_4: 6116; GFX940: ; %bb.0: 6117; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6118; GFX940-NEXT: ;;#ASMSTART 6119; GFX940-NEXT: ; def s[0:1] 6120; GFX940-NEXT: ;;#ASMEND 6121; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 6122; GFX940-NEXT: ;;#ASMSTART 6123; GFX940-NEXT: ; use s8 6124; GFX940-NEXT: ;;#ASMEND 6125; GFX940-NEXT: s_setpc_b64 s[30:31] 6126 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6127 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6128 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 4> 6129 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6130 ret void 6131} 6132 6133define void @s_shuffle_v2bf16_v4bf16__u_5() { 6134; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__u_5: 6135; GFX900: ; %bb.0: 6136; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6137; GFX900-NEXT: ;;#ASMSTART 6138; GFX900-NEXT: ; def s[8:9] 6139; GFX900-NEXT: ;;#ASMEND 6140; GFX900-NEXT: ;;#ASMSTART 6141; GFX900-NEXT: ; use s8 6142; GFX900-NEXT: ;;#ASMEND 6143; GFX900-NEXT: s_setpc_b64 s[30:31] 6144; 6145; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__u_5: 6146; GFX90A: ; %bb.0: 6147; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6148; GFX90A-NEXT: ;;#ASMSTART 6149; GFX90A-NEXT: ; def s[8:9] 6150; GFX90A-NEXT: ;;#ASMEND 6151; GFX90A-NEXT: ;;#ASMSTART 6152; GFX90A-NEXT: ; use s8 6153; GFX90A-NEXT: ;;#ASMEND 6154; GFX90A-NEXT: s_setpc_b64 s[30:31] 6155; 6156; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_5: 6157; GFX940: ; %bb.0: 6158; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6159; GFX940-NEXT: ;;#ASMSTART 6160; GFX940-NEXT: ; def s[8:9] 6161; GFX940-NEXT: ;;#ASMEND 6162; GFX940-NEXT: s_nop 0 6163; GFX940-NEXT: ;;#ASMSTART 6164; GFX940-NEXT: ; use s8 6165; GFX940-NEXT: ;;#ASMEND 6166; GFX940-NEXT: s_setpc_b64 s[30:31] 6167 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6168 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6169 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 poison, i32 5> 6170 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6171 ret void 6172} 6173 6174define void @s_shuffle_v2bf16_v4bf16__0_5() { 6175; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__0_5: 6176; GFX900: ; %bb.0: 6177; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6178; GFX900-NEXT: ;;#ASMSTART 6179; GFX900-NEXT: ; def s[4:5] 6180; GFX900-NEXT: ;;#ASMEND 6181; GFX900-NEXT: ;;#ASMSTART 6182; GFX900-NEXT: ; def s[6:7] 6183; GFX900-NEXT: ;;#ASMEND 6184; GFX900-NEXT: s_lshr_b32 s5, s6, 16 6185; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6186; GFX900-NEXT: ;;#ASMSTART 6187; GFX900-NEXT: ; use s8 6188; GFX900-NEXT: ;;#ASMEND 6189; GFX900-NEXT: s_setpc_b64 s[30:31] 6190; 6191; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__0_5: 6192; GFX90A: ; %bb.0: 6193; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6194; GFX90A-NEXT: ;;#ASMSTART 6195; GFX90A-NEXT: ; def s[4:5] 6196; GFX90A-NEXT: ;;#ASMEND 6197; GFX90A-NEXT: ;;#ASMSTART 6198; GFX90A-NEXT: ; def s[6:7] 6199; GFX90A-NEXT: ;;#ASMEND 6200; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 6201; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6202; GFX90A-NEXT: ;;#ASMSTART 6203; GFX90A-NEXT: ; use s8 6204; GFX90A-NEXT: ;;#ASMEND 6205; GFX90A-NEXT: s_setpc_b64 s[30:31] 6206; 6207; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_5: 6208; GFX940: ; %bb.0: 6209; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6210; GFX940-NEXT: ;;#ASMSTART 6211; GFX940-NEXT: ; def s[0:1] 6212; GFX940-NEXT: ;;#ASMEND 6213; GFX940-NEXT: ;;#ASMSTART 6214; GFX940-NEXT: ; def s[2:3] 6215; GFX940-NEXT: ;;#ASMEND 6216; GFX940-NEXT: s_lshr_b32 s1, s2, 16 6217; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 6218; GFX940-NEXT: ;;#ASMSTART 6219; GFX940-NEXT: ; use s8 6220; GFX940-NEXT: ;;#ASMEND 6221; GFX940-NEXT: s_setpc_b64 s[30:31] 6222 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6223 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6224 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 0, i32 5> 6225 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6226 ret void 6227} 6228 6229define void @s_shuffle_v2bf16_v4bf16__1_5() { 6230; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__1_5: 6231; GFX900: ; %bb.0: 6232; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6233; GFX900-NEXT: ;;#ASMSTART 6234; GFX900-NEXT: ; def s[4:5] 6235; GFX900-NEXT: ;;#ASMEND 6236; GFX900-NEXT: ;;#ASMSTART 6237; GFX900-NEXT: ; def s[6:7] 6238; GFX900-NEXT: ;;#ASMEND 6239; GFX900-NEXT: s_lshr_b32 s5, s6, 16 6240; GFX900-NEXT: s_lshr_b32 s4, s4, 16 6241; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6242; GFX900-NEXT: ;;#ASMSTART 6243; GFX900-NEXT: ; use s8 6244; GFX900-NEXT: ;;#ASMEND 6245; GFX900-NEXT: s_setpc_b64 s[30:31] 6246; 6247; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__1_5: 6248; GFX90A: ; %bb.0: 6249; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6250; GFX90A-NEXT: ;;#ASMSTART 6251; GFX90A-NEXT: ; def s[4:5] 6252; GFX90A-NEXT: ;;#ASMEND 6253; GFX90A-NEXT: ;;#ASMSTART 6254; GFX90A-NEXT: ; def s[6:7] 6255; GFX90A-NEXT: ;;#ASMEND 6256; GFX90A-NEXT: s_lshr_b32 s5, s6, 16 6257; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 6258; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6259; GFX90A-NEXT: ;;#ASMSTART 6260; GFX90A-NEXT: ; use s8 6261; GFX90A-NEXT: ;;#ASMEND 6262; GFX90A-NEXT: s_setpc_b64 s[30:31] 6263; 6264; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_5: 6265; GFX940: ; %bb.0: 6266; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6267; GFX940-NEXT: ;;#ASMSTART 6268; GFX940-NEXT: ; def s[0:1] 6269; GFX940-NEXT: ;;#ASMEND 6270; GFX940-NEXT: ;;#ASMSTART 6271; GFX940-NEXT: ; def s[2:3] 6272; GFX940-NEXT: ;;#ASMEND 6273; GFX940-NEXT: s_lshr_b32 s1, s2, 16 6274; GFX940-NEXT: s_lshr_b32 s0, s0, 16 6275; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 6276; GFX940-NEXT: ;;#ASMSTART 6277; GFX940-NEXT: ; use s8 6278; GFX940-NEXT: ;;#ASMEND 6279; GFX940-NEXT: s_setpc_b64 s[30:31] 6280 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6281 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6282 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 1, i32 5> 6283 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6284 ret void 6285} 6286 6287define void @s_shuffle_v2bf16_v4bf16__2_5() { 6288; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__2_5: 6289; GFX900: ; %bb.0: 6290; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6291; GFX900-NEXT: ;;#ASMSTART 6292; GFX900-NEXT: ; def s[4:5] 6293; GFX900-NEXT: ;;#ASMEND 6294; GFX900-NEXT: ;;#ASMSTART 6295; GFX900-NEXT: ; def s[6:7] 6296; GFX900-NEXT: ;;#ASMEND 6297; GFX900-NEXT: s_lshr_b32 s4, s6, 16 6298; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6299; GFX900-NEXT: ;;#ASMSTART 6300; GFX900-NEXT: ; use s8 6301; GFX900-NEXT: ;;#ASMEND 6302; GFX900-NEXT: s_setpc_b64 s[30:31] 6303; 6304; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__2_5: 6305; GFX90A: ; %bb.0: 6306; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6307; GFX90A-NEXT: ;;#ASMSTART 6308; GFX90A-NEXT: ; def s[4:5] 6309; GFX90A-NEXT: ;;#ASMEND 6310; GFX90A-NEXT: ;;#ASMSTART 6311; GFX90A-NEXT: ; def s[6:7] 6312; GFX90A-NEXT: ;;#ASMEND 6313; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 6314; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6315; GFX90A-NEXT: ;;#ASMSTART 6316; GFX90A-NEXT: ; use s8 6317; GFX90A-NEXT: ;;#ASMEND 6318; GFX90A-NEXT: s_setpc_b64 s[30:31] 6319; 6320; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_5: 6321; GFX940: ; %bb.0: 6322; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6323; GFX940-NEXT: ;;#ASMSTART 6324; GFX940-NEXT: ; def s[0:1] 6325; GFX940-NEXT: ;;#ASMEND 6326; GFX940-NEXT: ;;#ASMSTART 6327; GFX940-NEXT: ; def s[2:3] 6328; GFX940-NEXT: ;;#ASMEND 6329; GFX940-NEXT: s_lshr_b32 s0, s2, 16 6330; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 6331; GFX940-NEXT: ;;#ASMSTART 6332; GFX940-NEXT: ; use s8 6333; GFX940-NEXT: ;;#ASMEND 6334; GFX940-NEXT: s_setpc_b64 s[30:31] 6335 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6336 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6337 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 2, i32 5> 6338 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6339 ret void 6340} 6341 6342define void @s_shuffle_v2bf16_v4bf16__3_5() { 6343; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__3_5: 6344; GFX900: ; %bb.0: 6345; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6346; GFX900-NEXT: ;;#ASMSTART 6347; GFX900-NEXT: ; def s[4:5] 6348; GFX900-NEXT: ;;#ASMEND 6349; GFX900-NEXT: ;;#ASMSTART 6350; GFX900-NEXT: ; def s[6:7] 6351; GFX900-NEXT: ;;#ASMEND 6352; GFX900-NEXT: s_lshr_b32 s4, s6, 16 6353; GFX900-NEXT: s_lshr_b32 s5, s5, 16 6354; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6355; GFX900-NEXT: ;;#ASMSTART 6356; GFX900-NEXT: ; use s8 6357; GFX900-NEXT: ;;#ASMEND 6358; GFX900-NEXT: s_setpc_b64 s[30:31] 6359; 6360; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__3_5: 6361; GFX90A: ; %bb.0: 6362; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6363; GFX90A-NEXT: ;;#ASMSTART 6364; GFX90A-NEXT: ; def s[4:5] 6365; GFX90A-NEXT: ;;#ASMEND 6366; GFX90A-NEXT: ;;#ASMSTART 6367; GFX90A-NEXT: ; def s[6:7] 6368; GFX90A-NEXT: ;;#ASMEND 6369; GFX90A-NEXT: s_lshr_b32 s4, s6, 16 6370; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 6371; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6372; GFX90A-NEXT: ;;#ASMSTART 6373; GFX90A-NEXT: ; use s8 6374; GFX90A-NEXT: ;;#ASMEND 6375; GFX90A-NEXT: s_setpc_b64 s[30:31] 6376; 6377; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_5: 6378; GFX940: ; %bb.0: 6379; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6380; GFX940-NEXT: ;;#ASMSTART 6381; GFX940-NEXT: ; def s[0:1] 6382; GFX940-NEXT: ;;#ASMEND 6383; GFX940-NEXT: ;;#ASMSTART 6384; GFX940-NEXT: ; def s[2:3] 6385; GFX940-NEXT: ;;#ASMEND 6386; GFX940-NEXT: s_lshr_b32 s0, s2, 16 6387; GFX940-NEXT: s_lshr_b32 s1, s1, 16 6388; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 6389; GFX940-NEXT: ;;#ASMSTART 6390; GFX940-NEXT: ; use s8 6391; GFX940-NEXT: ;;#ASMEND 6392; GFX940-NEXT: s_setpc_b64 s[30:31] 6393 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6394 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6395 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 3, i32 5> 6396 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6397 ret void 6398} 6399 6400define void @s_shuffle_v2bf16_v4bf16__4_5() { 6401; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__4_5: 6402; GFX900: ; %bb.0: 6403; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6404; GFX900-NEXT: ;;#ASMSTART 6405; GFX900-NEXT: ; def s[8:9] 6406; GFX900-NEXT: ;;#ASMEND 6407; GFX900-NEXT: ;;#ASMSTART 6408; GFX900-NEXT: ; use s8 6409; GFX900-NEXT: ;;#ASMEND 6410; GFX900-NEXT: s_setpc_b64 s[30:31] 6411; 6412; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__4_5: 6413; GFX90A: ; %bb.0: 6414; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6415; GFX90A-NEXT: ;;#ASMSTART 6416; GFX90A-NEXT: ; def s[8:9] 6417; GFX90A-NEXT: ;;#ASMEND 6418; GFX90A-NEXT: ;;#ASMSTART 6419; GFX90A-NEXT: ; use s8 6420; GFX90A-NEXT: ;;#ASMEND 6421; GFX90A-NEXT: s_setpc_b64 s[30:31] 6422; 6423; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_5: 6424; GFX940: ; %bb.0: 6425; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6426; GFX940-NEXT: ;;#ASMSTART 6427; GFX940-NEXT: ; def s[8:9] 6428; GFX940-NEXT: ;;#ASMEND 6429; GFX940-NEXT: s_nop 0 6430; GFX940-NEXT: ;;#ASMSTART 6431; GFX940-NEXT: ; use s8 6432; GFX940-NEXT: ;;#ASMEND 6433; GFX940-NEXT: s_setpc_b64 s[30:31] 6434 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6435 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6436 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 4, i32 5> 6437 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6438 ret void 6439} 6440 6441define void @s_shuffle_v2bf16_v4bf16__5_5() { 6442; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__5_5: 6443; GFX900: ; %bb.0: 6444; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6445; GFX900-NEXT: ;;#ASMSTART 6446; GFX900-NEXT: ; def s[4:5] 6447; GFX900-NEXT: ;;#ASMEND 6448; GFX900-NEXT: s_lshr_b32 s4, s4, 16 6449; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s4 6450; GFX900-NEXT: ;;#ASMSTART 6451; GFX900-NEXT: ; use s8 6452; GFX900-NEXT: ;;#ASMEND 6453; GFX900-NEXT: s_setpc_b64 s[30:31] 6454; 6455; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__5_5: 6456; GFX90A: ; %bb.0: 6457; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6458; GFX90A-NEXT: ;;#ASMSTART 6459; GFX90A-NEXT: ; def s[4:5] 6460; GFX90A-NEXT: ;;#ASMEND 6461; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 6462; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s4 6463; GFX90A-NEXT: ;;#ASMSTART 6464; GFX90A-NEXT: ; use s8 6465; GFX90A-NEXT: ;;#ASMEND 6466; GFX90A-NEXT: s_setpc_b64 s[30:31] 6467; 6468; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_5: 6469; GFX940: ; %bb.0: 6470; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6471; GFX940-NEXT: ;;#ASMSTART 6472; GFX940-NEXT: ; def s[0:1] 6473; GFX940-NEXT: ;;#ASMEND 6474; GFX940-NEXT: s_lshr_b32 s0, s0, 16 6475; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s0 6476; GFX940-NEXT: ;;#ASMSTART 6477; GFX940-NEXT: ; use s8 6478; GFX940-NEXT: ;;#ASMEND 6479; GFX940-NEXT: s_setpc_b64 s[30:31] 6480 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6481 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6482 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 5> 6483 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6484 ret void 6485} 6486 6487define void @s_shuffle_v2bf16_v4bf16__6_5() { 6488; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__6_5: 6489; GFX900: ; %bb.0: 6490; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6491; GFX900-NEXT: ;;#ASMSTART 6492; GFX900-NEXT: ; def s[4:5] 6493; GFX900-NEXT: ;;#ASMEND 6494; GFX900-NEXT: s_lshr_b32 s4, s4, 16 6495; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6496; GFX900-NEXT: ;;#ASMSTART 6497; GFX900-NEXT: ; use s8 6498; GFX900-NEXT: ;;#ASMEND 6499; GFX900-NEXT: s_setpc_b64 s[30:31] 6500; 6501; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__6_5: 6502; GFX90A: ; %bb.0: 6503; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6504; GFX90A-NEXT: ;;#ASMSTART 6505; GFX90A-NEXT: ; def s[4:5] 6506; GFX90A-NEXT: ;;#ASMEND 6507; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 6508; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 6509; GFX90A-NEXT: ;;#ASMSTART 6510; GFX90A-NEXT: ; use s8 6511; GFX90A-NEXT: ;;#ASMEND 6512; GFX90A-NEXT: s_setpc_b64 s[30:31] 6513; 6514; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_5: 6515; GFX940: ; %bb.0: 6516; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6517; GFX940-NEXT: ;;#ASMSTART 6518; GFX940-NEXT: ; def s[0:1] 6519; GFX940-NEXT: ;;#ASMEND 6520; GFX940-NEXT: s_lshr_b32 s0, s0, 16 6521; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 6522; GFX940-NEXT: ;;#ASMSTART 6523; GFX940-NEXT: ; use s8 6524; GFX940-NEXT: ;;#ASMEND 6525; GFX940-NEXT: s_setpc_b64 s[30:31] 6526 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6527 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6528 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 5> 6529 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6530 ret void 6531} 6532 6533define void @s_shuffle_v2bf16_v4bf16__u_6() { 6534; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__u_6: 6535; GFX900: ; %bb.0: 6536; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6537; GFX900-NEXT: ;;#ASMSTART 6538; GFX900-NEXT: ; def s[4:5] 6539; GFX900-NEXT: ;;#ASMEND 6540; GFX900-NEXT: s_lshl_b32 s8, s5, 16 6541; GFX900-NEXT: ;;#ASMSTART 6542; GFX900-NEXT: ; use s8 6543; GFX900-NEXT: ;;#ASMEND 6544; GFX900-NEXT: s_setpc_b64 s[30:31] 6545; 6546; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__u_6: 6547; GFX90A: ; %bb.0: 6548; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6549; GFX90A-NEXT: ;;#ASMSTART 6550; GFX90A-NEXT: ; def s[4:5] 6551; GFX90A-NEXT: ;;#ASMEND 6552; GFX90A-NEXT: s_lshl_b32 s8, s5, 16 6553; GFX90A-NEXT: ;;#ASMSTART 6554; GFX90A-NEXT: ; use s8 6555; GFX90A-NEXT: ;;#ASMEND 6556; GFX90A-NEXT: s_setpc_b64 s[30:31] 6557; 6558; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_6: 6559; GFX940: ; %bb.0: 6560; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6561; GFX940-NEXT: ;;#ASMSTART 6562; GFX940-NEXT: ; def s[0:1] 6563; GFX940-NEXT: ;;#ASMEND 6564; GFX940-NEXT: s_lshl_b32 s8, s1, 16 6565; GFX940-NEXT: ;;#ASMSTART 6566; GFX940-NEXT: ; use s8 6567; GFX940-NEXT: ;;#ASMEND 6568; GFX940-NEXT: s_setpc_b64 s[30:31] 6569 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6570 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6571 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 poison, i32 6> 6572 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6573 ret void 6574} 6575 6576define void @s_shuffle_v2bf16_v4bf16__0_6() { 6577; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__0_6: 6578; GFX900: ; %bb.0: 6579; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6580; GFX900-NEXT: ;;#ASMSTART 6581; GFX900-NEXT: ; def s[4:5] 6582; GFX900-NEXT: ;;#ASMEND 6583; GFX900-NEXT: ;;#ASMSTART 6584; GFX900-NEXT: ; def s[6:7] 6585; GFX900-NEXT: ;;#ASMEND 6586; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 6587; GFX900-NEXT: ;;#ASMSTART 6588; GFX900-NEXT: ; use s8 6589; GFX900-NEXT: ;;#ASMEND 6590; GFX900-NEXT: s_setpc_b64 s[30:31] 6591; 6592; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__0_6: 6593; GFX90A: ; %bb.0: 6594; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6595; GFX90A-NEXT: ;;#ASMSTART 6596; GFX90A-NEXT: ; def s[4:5] 6597; GFX90A-NEXT: ;;#ASMEND 6598; GFX90A-NEXT: ;;#ASMSTART 6599; GFX90A-NEXT: ; def s[6:7] 6600; GFX90A-NEXT: ;;#ASMEND 6601; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 6602; GFX90A-NEXT: ;;#ASMSTART 6603; GFX90A-NEXT: ; use s8 6604; GFX90A-NEXT: ;;#ASMEND 6605; GFX90A-NEXT: s_setpc_b64 s[30:31] 6606; 6607; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_6: 6608; GFX940: ; %bb.0: 6609; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6610; GFX940-NEXT: ;;#ASMSTART 6611; GFX940-NEXT: ; def s[0:1] 6612; GFX940-NEXT: ;;#ASMEND 6613; GFX940-NEXT: ;;#ASMSTART 6614; GFX940-NEXT: ; def s[2:3] 6615; GFX940-NEXT: ;;#ASMEND 6616; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 6617; GFX940-NEXT: ;;#ASMSTART 6618; GFX940-NEXT: ; use s8 6619; GFX940-NEXT: ;;#ASMEND 6620; GFX940-NEXT: s_setpc_b64 s[30:31] 6621 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6622 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6623 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 0, i32 6> 6624 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6625 ret void 6626} 6627 6628define void @s_shuffle_v2bf16_v4bf16__1_6() { 6629; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__1_6: 6630; GFX900: ; %bb.0: 6631; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6632; GFX900-NEXT: ;;#ASMSTART 6633; GFX900-NEXT: ; def s[4:5] 6634; GFX900-NEXT: ;;#ASMEND 6635; GFX900-NEXT: s_lshr_b32 s4, s4, 16 6636; GFX900-NEXT: ;;#ASMSTART 6637; GFX900-NEXT: ; def s[6:7] 6638; GFX900-NEXT: ;;#ASMEND 6639; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 6640; GFX900-NEXT: ;;#ASMSTART 6641; GFX900-NEXT: ; use s8 6642; GFX900-NEXT: ;;#ASMEND 6643; GFX900-NEXT: s_setpc_b64 s[30:31] 6644; 6645; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__1_6: 6646; GFX90A: ; %bb.0: 6647; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6648; GFX90A-NEXT: ;;#ASMSTART 6649; GFX90A-NEXT: ; def s[4:5] 6650; GFX90A-NEXT: ;;#ASMEND 6651; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 6652; GFX90A-NEXT: ;;#ASMSTART 6653; GFX90A-NEXT: ; def s[6:7] 6654; GFX90A-NEXT: ;;#ASMEND 6655; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 6656; GFX90A-NEXT: ;;#ASMSTART 6657; GFX90A-NEXT: ; use s8 6658; GFX90A-NEXT: ;;#ASMEND 6659; GFX90A-NEXT: s_setpc_b64 s[30:31] 6660; 6661; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_6: 6662; GFX940: ; %bb.0: 6663; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6664; GFX940-NEXT: ;;#ASMSTART 6665; GFX940-NEXT: ; def s[0:1] 6666; GFX940-NEXT: ;;#ASMEND 6667; GFX940-NEXT: s_lshr_b32 s0, s0, 16 6668; GFX940-NEXT: ;;#ASMSTART 6669; GFX940-NEXT: ; def s[2:3] 6670; GFX940-NEXT: ;;#ASMEND 6671; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 6672; GFX940-NEXT: ;;#ASMSTART 6673; GFX940-NEXT: ; use s8 6674; GFX940-NEXT: ;;#ASMEND 6675; GFX940-NEXT: s_setpc_b64 s[30:31] 6676 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6677 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6678 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 1, i32 6> 6679 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6680 ret void 6681} 6682 6683define void @s_shuffle_v2bf16_v4bf16__2_6() { 6684; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__2_6: 6685; GFX900: ; %bb.0: 6686; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6687; GFX900-NEXT: ;;#ASMSTART 6688; GFX900-NEXT: ; def s[4:5] 6689; GFX900-NEXT: ;;#ASMEND 6690; GFX900-NEXT: ;;#ASMSTART 6691; GFX900-NEXT: ; def s[6:7] 6692; GFX900-NEXT: ;;#ASMEND 6693; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s7 6694; GFX900-NEXT: ;;#ASMSTART 6695; GFX900-NEXT: ; use s8 6696; GFX900-NEXT: ;;#ASMEND 6697; GFX900-NEXT: s_setpc_b64 s[30:31] 6698; 6699; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__2_6: 6700; GFX90A: ; %bb.0: 6701; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6702; GFX90A-NEXT: ;;#ASMSTART 6703; GFX90A-NEXT: ; def s[4:5] 6704; GFX90A-NEXT: ;;#ASMEND 6705; GFX90A-NEXT: ;;#ASMSTART 6706; GFX90A-NEXT: ; def s[6:7] 6707; GFX90A-NEXT: ;;#ASMEND 6708; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s7 6709; GFX90A-NEXT: ;;#ASMSTART 6710; GFX90A-NEXT: ; use s8 6711; GFX90A-NEXT: ;;#ASMEND 6712; GFX90A-NEXT: s_setpc_b64 s[30:31] 6713; 6714; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_6: 6715; GFX940: ; %bb.0: 6716; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6717; GFX940-NEXT: ;;#ASMSTART 6718; GFX940-NEXT: ; def s[0:1] 6719; GFX940-NEXT: ;;#ASMEND 6720; GFX940-NEXT: ;;#ASMSTART 6721; GFX940-NEXT: ; def s[2:3] 6722; GFX940-NEXT: ;;#ASMEND 6723; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s3 6724; GFX940-NEXT: ;;#ASMSTART 6725; GFX940-NEXT: ; use s8 6726; GFX940-NEXT: ;;#ASMEND 6727; GFX940-NEXT: s_setpc_b64 s[30:31] 6728 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6729 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6730 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 2, i32 6> 6731 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6732 ret void 6733} 6734 6735define void @s_shuffle_v2bf16_v4bf16__3_6() { 6736; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__3_6: 6737; GFX900: ; %bb.0: 6738; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6739; GFX900-NEXT: ;;#ASMSTART 6740; GFX900-NEXT: ; def s[4:5] 6741; GFX900-NEXT: ;;#ASMEND 6742; GFX900-NEXT: s_lshr_b32 s4, s5, 16 6743; GFX900-NEXT: ;;#ASMSTART 6744; GFX900-NEXT: ; def s[6:7] 6745; GFX900-NEXT: ;;#ASMEND 6746; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s7 6747; GFX900-NEXT: ;;#ASMSTART 6748; GFX900-NEXT: ; use s8 6749; GFX900-NEXT: ;;#ASMEND 6750; GFX900-NEXT: s_setpc_b64 s[30:31] 6751; 6752; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__3_6: 6753; GFX90A: ; %bb.0: 6754; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6755; GFX90A-NEXT: ;;#ASMSTART 6756; GFX90A-NEXT: ; def s[4:5] 6757; GFX90A-NEXT: ;;#ASMEND 6758; GFX90A-NEXT: s_lshr_b32 s4, s5, 16 6759; GFX90A-NEXT: ;;#ASMSTART 6760; GFX90A-NEXT: ; def s[6:7] 6761; GFX90A-NEXT: ;;#ASMEND 6762; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s7 6763; GFX90A-NEXT: ;;#ASMSTART 6764; GFX90A-NEXT: ; use s8 6765; GFX90A-NEXT: ;;#ASMEND 6766; GFX90A-NEXT: s_setpc_b64 s[30:31] 6767; 6768; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_6: 6769; GFX940: ; %bb.0: 6770; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6771; GFX940-NEXT: ;;#ASMSTART 6772; GFX940-NEXT: ; def s[0:1] 6773; GFX940-NEXT: ;;#ASMEND 6774; GFX940-NEXT: s_lshr_b32 s0, s1, 16 6775; GFX940-NEXT: ;;#ASMSTART 6776; GFX940-NEXT: ; def s[2:3] 6777; GFX940-NEXT: ;;#ASMEND 6778; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s3 6779; GFX940-NEXT: ;;#ASMSTART 6780; GFX940-NEXT: ; use s8 6781; GFX940-NEXT: ;;#ASMEND 6782; GFX940-NEXT: s_setpc_b64 s[30:31] 6783 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6784 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6785 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 3, i32 6> 6786 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6787 ret void 6788} 6789 6790define void @s_shuffle_v2bf16_v4bf16__4_6() { 6791; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__4_6: 6792; GFX900: ; %bb.0: 6793; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6794; GFX900-NEXT: ;;#ASMSTART 6795; GFX900-NEXT: ; def s[4:5] 6796; GFX900-NEXT: ;;#ASMEND 6797; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6798; GFX900-NEXT: ;;#ASMSTART 6799; GFX900-NEXT: ; use s8 6800; GFX900-NEXT: ;;#ASMEND 6801; GFX900-NEXT: s_setpc_b64 s[30:31] 6802; 6803; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__4_6: 6804; GFX90A: ; %bb.0: 6805; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6806; GFX90A-NEXT: ;;#ASMSTART 6807; GFX90A-NEXT: ; def s[4:5] 6808; GFX90A-NEXT: ;;#ASMEND 6809; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6810; GFX90A-NEXT: ;;#ASMSTART 6811; GFX90A-NEXT: ; use s8 6812; GFX90A-NEXT: ;;#ASMEND 6813; GFX90A-NEXT: s_setpc_b64 s[30:31] 6814; 6815; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_6: 6816; GFX940: ; %bb.0: 6817; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6818; GFX940-NEXT: ;;#ASMSTART 6819; GFX940-NEXT: ; def s[0:1] 6820; GFX940-NEXT: ;;#ASMEND 6821; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 6822; GFX940-NEXT: ;;#ASMSTART 6823; GFX940-NEXT: ; use s8 6824; GFX940-NEXT: ;;#ASMEND 6825; GFX940-NEXT: s_setpc_b64 s[30:31] 6826 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6827 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6828 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 4, i32 6> 6829 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6830 ret void 6831} 6832 6833define void @s_shuffle_v2bf16_v4bf16__5_6() { 6834; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__5_6: 6835; GFX900: ; %bb.0: 6836; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6837; GFX900-NEXT: ;;#ASMSTART 6838; GFX900-NEXT: ; def s[4:5] 6839; GFX900-NEXT: ;;#ASMEND 6840; GFX900-NEXT: s_lshr_b32 s4, s4, 16 6841; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6842; GFX900-NEXT: ;;#ASMSTART 6843; GFX900-NEXT: ; use s8 6844; GFX900-NEXT: ;;#ASMEND 6845; GFX900-NEXT: s_setpc_b64 s[30:31] 6846; 6847; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__5_6: 6848; GFX90A: ; %bb.0: 6849; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6850; GFX90A-NEXT: ;;#ASMSTART 6851; GFX90A-NEXT: ; def s[4:5] 6852; GFX90A-NEXT: ;;#ASMEND 6853; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 6854; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6855; GFX90A-NEXT: ;;#ASMSTART 6856; GFX90A-NEXT: ; use s8 6857; GFX90A-NEXT: ;;#ASMEND 6858; GFX90A-NEXT: s_setpc_b64 s[30:31] 6859; 6860; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_6: 6861; GFX940: ; %bb.0: 6862; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6863; GFX940-NEXT: ;;#ASMSTART 6864; GFX940-NEXT: ; def s[0:1] 6865; GFX940-NEXT: ;;#ASMEND 6866; GFX940-NEXT: s_lshr_b32 s0, s0, 16 6867; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 6868; GFX940-NEXT: ;;#ASMSTART 6869; GFX940-NEXT: ; use s8 6870; GFX940-NEXT: ;;#ASMEND 6871; GFX940-NEXT: s_setpc_b64 s[30:31] 6872 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6873 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6874 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 6> 6875 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6876 ret void 6877} 6878 6879define void @s_shuffle_v2bf16_v4bf16__6_6() { 6880; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__6_6: 6881; GFX900: ; %bb.0: 6882; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6883; GFX900-NEXT: ;;#ASMSTART 6884; GFX900-NEXT: ; def s[4:5] 6885; GFX900-NEXT: ;;#ASMEND 6886; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s5 6887; GFX900-NEXT: ;;#ASMSTART 6888; GFX900-NEXT: ; use s8 6889; GFX900-NEXT: ;;#ASMEND 6890; GFX900-NEXT: s_setpc_b64 s[30:31] 6891; 6892; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__6_6: 6893; GFX90A: ; %bb.0: 6894; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6895; GFX90A-NEXT: ;;#ASMSTART 6896; GFX90A-NEXT: ; def s[4:5] 6897; GFX90A-NEXT: ;;#ASMEND 6898; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s5 6899; GFX90A-NEXT: ;;#ASMSTART 6900; GFX90A-NEXT: ; use s8 6901; GFX90A-NEXT: ;;#ASMEND 6902; GFX90A-NEXT: s_setpc_b64 s[30:31] 6903; 6904; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_6: 6905; GFX940: ; %bb.0: 6906; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6907; GFX940-NEXT: ;;#ASMSTART 6908; GFX940-NEXT: ; def s[0:1] 6909; GFX940-NEXT: ;;#ASMEND 6910; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s1 6911; GFX940-NEXT: ;;#ASMSTART 6912; GFX940-NEXT: ; use s8 6913; GFX940-NEXT: ;;#ASMEND 6914; GFX940-NEXT: s_setpc_b64 s[30:31] 6915 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6916 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6917 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 6> 6918 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6919 ret void 6920} 6921 6922define void @s_shuffle_v2bf16_v4bf16__u_7() { 6923; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__u_7: 6924; GFX900: ; %bb.0: 6925; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6926; GFX900-NEXT: ;;#ASMSTART 6927; GFX900-NEXT: ; def s[4:5] 6928; GFX900-NEXT: ;;#ASMEND 6929; GFX900-NEXT: s_mov_b32 s8, s5 6930; GFX900-NEXT: ;;#ASMSTART 6931; GFX900-NEXT: ; use s8 6932; GFX900-NEXT: ;;#ASMEND 6933; GFX900-NEXT: s_setpc_b64 s[30:31] 6934; 6935; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__u_7: 6936; GFX90A: ; %bb.0: 6937; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6938; GFX90A-NEXT: ;;#ASMSTART 6939; GFX90A-NEXT: ; def s[4:5] 6940; GFX90A-NEXT: ;;#ASMEND 6941; GFX90A-NEXT: s_mov_b32 s8, s5 6942; GFX90A-NEXT: ;;#ASMSTART 6943; GFX90A-NEXT: ; use s8 6944; GFX90A-NEXT: ;;#ASMEND 6945; GFX90A-NEXT: s_setpc_b64 s[30:31] 6946; 6947; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__u_7: 6948; GFX940: ; %bb.0: 6949; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6950; GFX940-NEXT: ;;#ASMSTART 6951; GFX940-NEXT: ; def s[0:1] 6952; GFX940-NEXT: ;;#ASMEND 6953; GFX940-NEXT: s_mov_b32 s8, s1 6954; GFX940-NEXT: ;;#ASMSTART 6955; GFX940-NEXT: ; use s8 6956; GFX940-NEXT: ;;#ASMEND 6957; GFX940-NEXT: s_setpc_b64 s[30:31] 6958 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 6959 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 6960 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 poison, i32 7> 6961 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 6962 ret void 6963} 6964 6965define void @s_shuffle_v2bf16_v4bf16__0_7() { 6966; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__0_7: 6967; GFX900: ; %bb.0: 6968; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6969; GFX900-NEXT: ;;#ASMSTART 6970; GFX900-NEXT: ; def s[4:5] 6971; GFX900-NEXT: ;;#ASMEND 6972; GFX900-NEXT: ;;#ASMSTART 6973; GFX900-NEXT: ; def s[6:7] 6974; GFX900-NEXT: ;;#ASMEND 6975; GFX900-NEXT: s_lshr_b32 s5, s7, 16 6976; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6977; GFX900-NEXT: ;;#ASMSTART 6978; GFX900-NEXT: ; use s8 6979; GFX900-NEXT: ;;#ASMEND 6980; GFX900-NEXT: s_setpc_b64 s[30:31] 6981; 6982; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__0_7: 6983; GFX90A: ; %bb.0: 6984; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 6985; GFX90A-NEXT: ;;#ASMSTART 6986; GFX90A-NEXT: ; def s[4:5] 6987; GFX90A-NEXT: ;;#ASMEND 6988; GFX90A-NEXT: ;;#ASMSTART 6989; GFX90A-NEXT: ; def s[6:7] 6990; GFX90A-NEXT: ;;#ASMEND 6991; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 6992; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 6993; GFX90A-NEXT: ;;#ASMSTART 6994; GFX90A-NEXT: ; use s8 6995; GFX90A-NEXT: ;;#ASMEND 6996; GFX90A-NEXT: s_setpc_b64 s[30:31] 6997; 6998; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__0_7: 6999; GFX940: ; %bb.0: 7000; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7001; GFX940-NEXT: ;;#ASMSTART 7002; GFX940-NEXT: ; def s[0:1] 7003; GFX940-NEXT: ;;#ASMEND 7004; GFX940-NEXT: ;;#ASMSTART 7005; GFX940-NEXT: ; def s[2:3] 7006; GFX940-NEXT: ;;#ASMEND 7007; GFX940-NEXT: s_lshr_b32 s1, s3, 16 7008; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 7009; GFX940-NEXT: ;;#ASMSTART 7010; GFX940-NEXT: ; use s8 7011; GFX940-NEXT: ;;#ASMEND 7012; GFX940-NEXT: s_setpc_b64 s[30:31] 7013 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 7014 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 7015 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 0, i32 7> 7016 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 7017 ret void 7018} 7019 7020define void @s_shuffle_v2bf16_v4bf16__1_7() { 7021; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__1_7: 7022; GFX900: ; %bb.0: 7023; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7024; GFX900-NEXT: ;;#ASMSTART 7025; GFX900-NEXT: ; def s[4:5] 7026; GFX900-NEXT: ;;#ASMEND 7027; GFX900-NEXT: ;;#ASMSTART 7028; GFX900-NEXT: ; def s[6:7] 7029; GFX900-NEXT: ;;#ASMEND 7030; GFX900-NEXT: s_lshr_b32 s5, s7, 16 7031; GFX900-NEXT: s_lshr_b32 s4, s4, 16 7032; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 7033; GFX900-NEXT: ;;#ASMSTART 7034; GFX900-NEXT: ; use s8 7035; GFX900-NEXT: ;;#ASMEND 7036; GFX900-NEXT: s_setpc_b64 s[30:31] 7037; 7038; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__1_7: 7039; GFX90A: ; %bb.0: 7040; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7041; GFX90A-NEXT: ;;#ASMSTART 7042; GFX90A-NEXT: ; def s[4:5] 7043; GFX90A-NEXT: ;;#ASMEND 7044; GFX90A-NEXT: ;;#ASMSTART 7045; GFX90A-NEXT: ; def s[6:7] 7046; GFX90A-NEXT: ;;#ASMEND 7047; GFX90A-NEXT: s_lshr_b32 s5, s7, 16 7048; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 7049; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 7050; GFX90A-NEXT: ;;#ASMSTART 7051; GFX90A-NEXT: ; use s8 7052; GFX90A-NEXT: ;;#ASMEND 7053; GFX90A-NEXT: s_setpc_b64 s[30:31] 7054; 7055; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__1_7: 7056; GFX940: ; %bb.0: 7057; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7058; GFX940-NEXT: ;;#ASMSTART 7059; GFX940-NEXT: ; def s[0:1] 7060; GFX940-NEXT: ;;#ASMEND 7061; GFX940-NEXT: ;;#ASMSTART 7062; GFX940-NEXT: ; def s[2:3] 7063; GFX940-NEXT: ;;#ASMEND 7064; GFX940-NEXT: s_lshr_b32 s1, s3, 16 7065; GFX940-NEXT: s_lshr_b32 s0, s0, 16 7066; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 7067; GFX940-NEXT: ;;#ASMSTART 7068; GFX940-NEXT: ; use s8 7069; GFX940-NEXT: ;;#ASMEND 7070; GFX940-NEXT: s_setpc_b64 s[30:31] 7071 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 7072 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 7073 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 1, i32 7> 7074 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 7075 ret void 7076} 7077 7078define void @s_shuffle_v2bf16_v4bf16__2_7() { 7079; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__2_7: 7080; GFX900: ; %bb.0: 7081; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7082; GFX900-NEXT: ;;#ASMSTART 7083; GFX900-NEXT: ; def s[4:5] 7084; GFX900-NEXT: ;;#ASMEND 7085; GFX900-NEXT: ;;#ASMSTART 7086; GFX900-NEXT: ; def s[6:7] 7087; GFX900-NEXT: ;;#ASMEND 7088; GFX900-NEXT: s_lshr_b32 s4, s7, 16 7089; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7090; GFX900-NEXT: ;;#ASMSTART 7091; GFX900-NEXT: ; use s8 7092; GFX900-NEXT: ;;#ASMEND 7093; GFX900-NEXT: s_setpc_b64 s[30:31] 7094; 7095; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__2_7: 7096; GFX90A: ; %bb.0: 7097; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7098; GFX90A-NEXT: ;;#ASMSTART 7099; GFX90A-NEXT: ; def s[4:5] 7100; GFX90A-NEXT: ;;#ASMEND 7101; GFX90A-NEXT: ;;#ASMSTART 7102; GFX90A-NEXT: ; def s[6:7] 7103; GFX90A-NEXT: ;;#ASMEND 7104; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 7105; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7106; GFX90A-NEXT: ;;#ASMSTART 7107; GFX90A-NEXT: ; use s8 7108; GFX90A-NEXT: ;;#ASMEND 7109; GFX90A-NEXT: s_setpc_b64 s[30:31] 7110; 7111; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__2_7: 7112; GFX940: ; %bb.0: 7113; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7114; GFX940-NEXT: ;;#ASMSTART 7115; GFX940-NEXT: ; def s[0:1] 7116; GFX940-NEXT: ;;#ASMEND 7117; GFX940-NEXT: ;;#ASMSTART 7118; GFX940-NEXT: ; def s[2:3] 7119; GFX940-NEXT: ;;#ASMEND 7120; GFX940-NEXT: s_lshr_b32 s0, s3, 16 7121; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 7122; GFX940-NEXT: ;;#ASMSTART 7123; GFX940-NEXT: ; use s8 7124; GFX940-NEXT: ;;#ASMEND 7125; GFX940-NEXT: s_setpc_b64 s[30:31] 7126 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 7127 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 7128 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 2, i32 7> 7129 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 7130 ret void 7131} 7132 7133define void @s_shuffle_v2bf16_v4bf16__3_7() { 7134; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__3_7: 7135; GFX900: ; %bb.0: 7136; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7137; GFX900-NEXT: ;;#ASMSTART 7138; GFX900-NEXT: ; def s[4:5] 7139; GFX900-NEXT: ;;#ASMEND 7140; GFX900-NEXT: ;;#ASMSTART 7141; GFX900-NEXT: ; def s[6:7] 7142; GFX900-NEXT: ;;#ASMEND 7143; GFX900-NEXT: s_lshr_b32 s4, s7, 16 7144; GFX900-NEXT: s_lshr_b32 s5, s5, 16 7145; GFX900-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7146; GFX900-NEXT: ;;#ASMSTART 7147; GFX900-NEXT: ; use s8 7148; GFX900-NEXT: ;;#ASMEND 7149; GFX900-NEXT: s_setpc_b64 s[30:31] 7150; 7151; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__3_7: 7152; GFX90A: ; %bb.0: 7153; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7154; GFX90A-NEXT: ;;#ASMSTART 7155; GFX90A-NEXT: ; def s[4:5] 7156; GFX90A-NEXT: ;;#ASMEND 7157; GFX90A-NEXT: ;;#ASMSTART 7158; GFX90A-NEXT: ; def s[6:7] 7159; GFX90A-NEXT: ;;#ASMEND 7160; GFX90A-NEXT: s_lshr_b32 s4, s7, 16 7161; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 7162; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s5, s4 7163; GFX90A-NEXT: ;;#ASMSTART 7164; GFX90A-NEXT: ; use s8 7165; GFX90A-NEXT: ;;#ASMEND 7166; GFX90A-NEXT: s_setpc_b64 s[30:31] 7167; 7168; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__3_7: 7169; GFX940: ; %bb.0: 7170; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7171; GFX940-NEXT: ;;#ASMSTART 7172; GFX940-NEXT: ; def s[0:1] 7173; GFX940-NEXT: ;;#ASMEND 7174; GFX940-NEXT: ;;#ASMSTART 7175; GFX940-NEXT: ; def s[2:3] 7176; GFX940-NEXT: ;;#ASMEND 7177; GFX940-NEXT: s_lshr_b32 s0, s3, 16 7178; GFX940-NEXT: s_lshr_b32 s1, s1, 16 7179; GFX940-NEXT: s_pack_ll_b32_b16 s8, s1, s0 7180; GFX940-NEXT: ;;#ASMSTART 7181; GFX940-NEXT: ; use s8 7182; GFX940-NEXT: ;;#ASMEND 7183; GFX940-NEXT: s_setpc_b64 s[30:31] 7184 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 7185 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 7186 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 3, i32 7> 7187 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 7188 ret void 7189} 7190 7191define void @s_shuffle_v2bf16_v4bf16__4_7() { 7192; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__4_7: 7193; GFX900: ; %bb.0: 7194; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7195; GFX900-NEXT: ;;#ASMSTART 7196; GFX900-NEXT: ; def s[4:5] 7197; GFX900-NEXT: ;;#ASMEND 7198; GFX900-NEXT: s_lshr_b32 s5, s5, 16 7199; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 7200; GFX900-NEXT: ;;#ASMSTART 7201; GFX900-NEXT: ; use s8 7202; GFX900-NEXT: ;;#ASMEND 7203; GFX900-NEXT: s_setpc_b64 s[30:31] 7204; 7205; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__4_7: 7206; GFX90A: ; %bb.0: 7207; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7208; GFX90A-NEXT: ;;#ASMSTART 7209; GFX90A-NEXT: ; def s[4:5] 7210; GFX90A-NEXT: ;;#ASMEND 7211; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 7212; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 7213; GFX90A-NEXT: ;;#ASMSTART 7214; GFX90A-NEXT: ; use s8 7215; GFX90A-NEXT: ;;#ASMEND 7216; GFX90A-NEXT: s_setpc_b64 s[30:31] 7217; 7218; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__4_7: 7219; GFX940: ; %bb.0: 7220; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7221; GFX940-NEXT: ;;#ASMSTART 7222; GFX940-NEXT: ; def s[0:1] 7223; GFX940-NEXT: ;;#ASMEND 7224; GFX940-NEXT: s_lshr_b32 s1, s1, 16 7225; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 7226; GFX940-NEXT: ;;#ASMSTART 7227; GFX940-NEXT: ; use s8 7228; GFX940-NEXT: ;;#ASMEND 7229; GFX940-NEXT: s_setpc_b64 s[30:31] 7230 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 7231 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 7232 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 4, i32 7> 7233 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 7234 ret void 7235} 7236 7237define void @s_shuffle_v2bf16_v4bf16__5_7() { 7238; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__5_7: 7239; GFX900: ; %bb.0: 7240; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7241; GFX900-NEXT: ;;#ASMSTART 7242; GFX900-NEXT: ; def s[4:5] 7243; GFX900-NEXT: ;;#ASMEND 7244; GFX900-NEXT: s_lshr_b32 s5, s5, 16 7245; GFX900-NEXT: s_lshr_b32 s4, s4, 16 7246; GFX900-NEXT: s_pack_ll_b32_b16 s8, s4, s5 7247; GFX900-NEXT: ;;#ASMSTART 7248; GFX900-NEXT: ; use s8 7249; GFX900-NEXT: ;;#ASMEND 7250; GFX900-NEXT: s_setpc_b64 s[30:31] 7251; 7252; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__5_7: 7253; GFX90A: ; %bb.0: 7254; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7255; GFX90A-NEXT: ;;#ASMSTART 7256; GFX90A-NEXT: ; def s[4:5] 7257; GFX90A-NEXT: ;;#ASMEND 7258; GFX90A-NEXT: s_lshr_b32 s5, s5, 16 7259; GFX90A-NEXT: s_lshr_b32 s4, s4, 16 7260; GFX90A-NEXT: s_pack_ll_b32_b16 s8, s4, s5 7261; GFX90A-NEXT: ;;#ASMSTART 7262; GFX90A-NEXT: ; use s8 7263; GFX90A-NEXT: ;;#ASMEND 7264; GFX90A-NEXT: s_setpc_b64 s[30:31] 7265; 7266; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__5_7: 7267; GFX940: ; %bb.0: 7268; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7269; GFX940-NEXT: ;;#ASMSTART 7270; GFX940-NEXT: ; def s[0:1] 7271; GFX940-NEXT: ;;#ASMEND 7272; GFX940-NEXT: s_lshr_b32 s1, s1, 16 7273; GFX940-NEXT: s_lshr_b32 s0, s0, 16 7274; GFX940-NEXT: s_pack_ll_b32_b16 s8, s0, s1 7275; GFX940-NEXT: ;;#ASMSTART 7276; GFX940-NEXT: ; use s8 7277; GFX940-NEXT: ;;#ASMEND 7278; GFX940-NEXT: s_setpc_b64 s[30:31] 7279 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 7280 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 7281 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 5, i32 7> 7282 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 7283 ret void 7284} 7285 7286define void @s_shuffle_v2bf16_v4bf16__6_7() { 7287; GFX900-LABEL: s_shuffle_v2bf16_v4bf16__6_7: 7288; GFX900: ; %bb.0: 7289; GFX900-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7290; GFX900-NEXT: ;;#ASMSTART 7291; GFX900-NEXT: ; def s[4:5] 7292; GFX900-NEXT: ;;#ASMEND 7293; GFX900-NEXT: s_mov_b32 s8, s5 7294; GFX900-NEXT: ;;#ASMSTART 7295; GFX900-NEXT: ; use s8 7296; GFX900-NEXT: ;;#ASMEND 7297; GFX900-NEXT: s_setpc_b64 s[30:31] 7298; 7299; GFX90A-LABEL: s_shuffle_v2bf16_v4bf16__6_7: 7300; GFX90A: ; %bb.0: 7301; GFX90A-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7302; GFX90A-NEXT: ;;#ASMSTART 7303; GFX90A-NEXT: ; def s[4:5] 7304; GFX90A-NEXT: ;;#ASMEND 7305; GFX90A-NEXT: s_mov_b32 s8, s5 7306; GFX90A-NEXT: ;;#ASMSTART 7307; GFX90A-NEXT: ; use s8 7308; GFX90A-NEXT: ;;#ASMEND 7309; GFX90A-NEXT: s_setpc_b64 s[30:31] 7310; 7311; GFX940-LABEL: s_shuffle_v2bf16_v4bf16__6_7: 7312; GFX940: ; %bb.0: 7313; GFX940-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 7314; GFX940-NEXT: ;;#ASMSTART 7315; GFX940-NEXT: ; def s[0:1] 7316; GFX940-NEXT: ;;#ASMEND 7317; GFX940-NEXT: s_mov_b32 s8, s1 7318; GFX940-NEXT: ;;#ASMSTART 7319; GFX940-NEXT: ; use s8 7320; GFX940-NEXT: ;;#ASMEND 7321; GFX940-NEXT: s_setpc_b64 s[30:31] 7322 %vec0 = call <4 x bfloat> asm "; def $0", "=s"() 7323 %vec1 = call <4 x bfloat> asm "; def $0", "=s"() 7324 %shuf = shufflevector <4 x bfloat> %vec0, <4 x bfloat> %vec1, <2 x i32> <i32 6, i32 7> 7325 call void asm sideeffect "; use $0", "{s8}"(<2 x bfloat> %shuf) 7326 ret void 7327} 7328;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: 7329; GFX90APLUS: {{.*}} 7330