1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel=0 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-SDAG %s 3; RUN: llc -global-isel=1 -amdgpu-load-store-vectorizer=0 -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX12-GISEL %s 4 5declare i32 @llvm.amdgcn.permlane16.var(i32, i32, i32, i1, i1) 6declare i32 @llvm.amdgcn.permlanex16.var(i32, i32, i32, i1, i1) 7declare i32 @llvm.amdgcn.workitem.id.x() 8declare i32 @llvm.amdgcn.workitem.id.y() 9 10define amdgpu_kernel void @v_permlane16var_b32_vv(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 11; GFX12-SDAG-LABEL: v_permlane16var_b32_vv: 12; GFX12-SDAG: ; %bb.0: 13; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 14; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 15; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 16; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 17; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 18; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 19; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 20; GFX12-SDAG-NEXT: s_endpgm 21; 22; GFX12-GISEL-LABEL: v_permlane16var_b32_vv: 23; GFX12-GISEL: ; %bb.0: 24; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 25; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 26; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 27; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 28; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 29; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 30; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 31; GFX12-GISEL-NEXT: s_endpgm 32 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 false) 33 store i32 %v, ptr addrspace(1) %out 34 ret void 35} 36 37define amdgpu_kernel void @v_permlane16var_b32_vi(ptr addrspace(1) %out, i32 %src0) { 38; GFX12-SDAG-LABEL: v_permlane16var_b32_vi: 39; GFX12-SDAG: ; %bb.0: 40; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 41; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1 42; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 43; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 44; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 45; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v1, v0 46; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1] 47; GFX12-SDAG-NEXT: s_endpgm 48; 49; GFX12-GISEL-LABEL: v_permlane16var_b32_vi: 50; GFX12-GISEL: ; %bb.0: 51; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 52; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 53; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, s2 54; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 55; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v1, v0 56; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 57; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 58; GFX12-GISEL-NEXT: s_endpgm 59 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 1, i1 false, i1 false) 60 store i32 %v, ptr addrspace(1) %out 61 ret void 62} 63 64define amdgpu_kernel void @v_permlane16var_b32_vl(ptr addrspace(1) %out, i32 %src0) { 65; GFX12-SDAG-LABEL: v_permlane16var_b32_vl: 66; GFX12-SDAG: ; %bb.0: 67; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 68; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0xc1d1 69; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 70; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 71; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 72; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v1, v0 73; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1] 74; GFX12-SDAG-NEXT: s_endpgm 75; 76; GFX12-GISEL-LABEL: v_permlane16var_b32_vl: 77; GFX12-GISEL: ; %bb.0: 78; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 79; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 80; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 0xc1d1 :: v_dual_mov_b32 v1, s2 81; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 82; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v1, v0 83; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 84; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 85; GFX12-GISEL-NEXT: s_endpgm 86 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 49617, i1 false, i1 false) 87 store i32 %v, ptr addrspace(1) %out 88 ret void 89} 90 91define amdgpu_kernel void @v_permlane16var_b32_vvv(ptr addrspace(1) %out, i32 %src0) { 92; GFX12-SDAG-LABEL: v_permlane16var_b32_vvv: 93; GFX12-SDAG: ; %bb.0: 94; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 95; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 96; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 97; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 98; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 99; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v1, v0 100; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1] 101; GFX12-SDAG-NEXT: s_endpgm 102; 103; GFX12-GISEL-LABEL: v_permlane16var_b32_vvv: 104; GFX12-GISEL: ; %bb.0: 105; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 106; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 107; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 108; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 109; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v1, v0 110; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 111; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 112; GFX12-GISEL-NEXT: s_endpgm 113 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 114 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %tidx, i1 false, i1 false) 115 store i32 %v, ptr addrspace(1) %out 116 ret void 117} 118 119define amdgpu_kernel void @v_permlane16var_b32_vv_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 120; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_fi: 121; GFX12-SDAG: ; %bb.0: 122; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 123; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 124; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 125; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 126; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 127; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0] 128; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 129; GFX12-SDAG-NEXT: s_endpgm 130; 131; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_fi: 132; GFX12-GISEL: ; %bb.0: 133; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 134; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 135; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 136; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 137; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0] 138; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 139; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 140; GFX12-GISEL-NEXT: s_endpgm 141 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 false) 142 store i32 %v, ptr addrspace(1) %out 143 ret void 144} 145 146define amdgpu_kernel void @v_permlane16var_b32_vv_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 147; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_bc: 148; GFX12-SDAG: ; %bb.0: 149; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 150; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 151; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 152; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 153; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 154; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1] 155; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 156; GFX12-SDAG-NEXT: s_endpgm 157; 158; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_bc: 159; GFX12-GISEL: ; %bb.0: 160; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 161; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 162; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 163; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 164; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1] 165; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 166; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 167; GFX12-GISEL-NEXT: s_endpgm 168 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 true) 169 store i32 %v, ptr addrspace(1) %out 170 ret void 171} 172 173define amdgpu_kernel void @v_permlane16var_b32_vv_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 174; GFX12-SDAG-LABEL: v_permlane16var_b32_vv_fi_bc: 175; GFX12-SDAG: ; %bb.0: 176; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 177; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 178; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 179; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 180; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 181; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1] 182; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 183; GFX12-SDAG-NEXT: s_endpgm 184; 185; GFX12-GISEL-LABEL: v_permlane16var_b32_vv_fi_bc: 186; GFX12-GISEL: ; %bb.0: 187; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 188; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 189; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 190; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 191; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1] 192; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 193; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 194; GFX12-GISEL-NEXT: s_endpgm 195 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 true) 196 store i32 %v, ptr addrspace(1) %out 197 ret void 198} 199 200define amdgpu_kernel void @v_permlanex16var_b32_vv(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 201; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv: 202; GFX12-SDAG: ; %bb.0: 203; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 204; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 205; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 206; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 207; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 208; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 209; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 210; GFX12-SDAG-NEXT: s_endpgm 211; 212; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv: 213; GFX12-GISEL: ; %bb.0: 214; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 215; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 216; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 217; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 218; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 219; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 220; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 221; GFX12-GISEL-NEXT: s_endpgm 222 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 false) 223 store i32 %v, ptr addrspace(1) %out 224 ret void 225} 226 227define amdgpu_kernel void @v_permlanex16var_b32_vi(ptr addrspace(1) %out, i32 %src0) { 228; GFX12-SDAG-LABEL: v_permlanex16var_b32_vi: 229; GFX12-SDAG: ; %bb.0: 230; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 231; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 1 232; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 233; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 234; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 235; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v1, v0 236; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1] 237; GFX12-SDAG-NEXT: s_endpgm 238; 239; GFX12-GISEL-LABEL: v_permlanex16var_b32_vi: 240; GFX12-GISEL: ; %bb.0: 241; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 242; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 243; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 1 :: v_dual_mov_b32 v1, s2 244; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 245; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v1, v0 246; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 247; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 248; GFX12-GISEL-NEXT: s_endpgm 249 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 1, i1 false, i1 false) 250 store i32 %v, ptr addrspace(1) %out 251 ret void 252} 253 254define amdgpu_kernel void @v_permlanex16var_b32_vl(ptr addrspace(1) %out, i32 %src0) { 255; GFX12-SDAG-LABEL: v_permlanex16var_b32_vl: 256; GFX12-SDAG: ; %bb.0: 257; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 258; GFX12-SDAG-NEXT: v_mov_b32_e32 v0, 0xc1d1 259; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 260; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 261; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 262; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v1, v0 263; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1] 264; GFX12-SDAG-NEXT: s_endpgm 265; 266; GFX12-GISEL-LABEL: v_permlanex16var_b32_vl: 267; GFX12-GISEL: ; %bb.0: 268; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 269; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 270; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, 0xc1d1 :: v_dual_mov_b32 v1, s2 271; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 272; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v1, v0 273; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 274; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 275; GFX12-GISEL-NEXT: s_endpgm 276 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 49617, i1 false, i1 false) 277 store i32 %v, ptr addrspace(1) %out 278 ret void 279} 280 281define amdgpu_kernel void @v_permlanex16var_b32_vvv(ptr addrspace(1) %out, i32 %src0) { 282; GFX12-SDAG-LABEL: v_permlanex16var_b32_vvv: 283; GFX12-SDAG: ; %bb.0: 284; GFX12-SDAG-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 285; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 286; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 287; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 288; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 289; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v1, v0 290; GFX12-SDAG-NEXT: global_store_b32 v2, v1, s[0:1] 291; GFX12-SDAG-NEXT: s_endpgm 292; 293; GFX12-GISEL-LABEL: v_permlanex16var_b32_vvv: 294; GFX12-GISEL: ; %bb.0: 295; GFX12-GISEL-NEXT: s_load_b96 s[0:2], s[4:5], 0x24 296; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 297; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 298; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 299; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v1, v0 300; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 301; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 302; GFX12-GISEL-NEXT: s_endpgm 303 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 304 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %tidx, i1 false, i1 false) 305 store i32 %v, ptr addrspace(1) %out 306 ret void 307} 308 309define amdgpu_kernel void @v_permlanex16var_b32_vv_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 310; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_fi: 311; GFX12-SDAG: ; %bb.0: 312; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 313; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 314; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 315; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 316; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 317; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0] 318; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 319; GFX12-SDAG-NEXT: s_endpgm 320; 321; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_fi: 322; GFX12-GISEL: ; %bb.0: 323; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 324; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 325; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 326; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 327; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0] 328; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 329; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 330; GFX12-GISEL-NEXT: s_endpgm 331 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 false) 332 store i32 %v, ptr addrspace(1) %out 333 ret void 334} 335 336define amdgpu_kernel void @v_permlanex16var_b32_vv_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 337; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_bc: 338; GFX12-SDAG: ; %bb.0: 339; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 340; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 341; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 342; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 343; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 344; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1] 345; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 346; GFX12-SDAG-NEXT: s_endpgm 347; 348; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_bc: 349; GFX12-GISEL: ; %bb.0: 350; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 351; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 352; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 353; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 354; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1] 355; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 356; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 357; GFX12-GISEL-NEXT: s_endpgm 358 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 false, i1 true) 359 store i32 %v, ptr addrspace(1) %out 360 ret void 361} 362 363define amdgpu_kernel void @v_permlanex16var_b32_vv_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 364; GFX12-SDAG-LABEL: v_permlanex16var_b32_vv_fi_bc: 365; GFX12-SDAG: ; %bb.0: 366; GFX12-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 367; GFX12-SDAG-NEXT: v_mov_b32_e32 v2, 0 368; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 369; GFX12-SDAG-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 370; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 371; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1] 372; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 373; GFX12-SDAG-NEXT: s_endpgm 374; 375; GFX12-GISEL-LABEL: v_permlanex16var_b32_vv_fi_bc: 376; GFX12-GISEL: ; %bb.0: 377; GFX12-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 378; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 379; GFX12-GISEL-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v1, s3 380; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 381; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1] 382; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 383; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 384; GFX12-GISEL-NEXT: s_endpgm 385 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %src0, i32 %src0, i32 %src1, i1 true, i1 true) 386 store i32 %v, ptr addrspace(1) %out 387 ret void 388} 389 390define amdgpu_kernel void @v_permlane16var_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 391; GFX12-SDAG-LABEL: v_permlane16var_b32_tid_tid: 392; GFX12-SDAG: ; %bb.0: 393; GFX12-SDAG-NEXT: s_clause 0x1 394; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 395; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 396; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 397; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 398; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 399; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 400; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 401; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 402; GFX12-SDAG-NEXT: s_endpgm 403; 404; GFX12-GISEL-LABEL: v_permlane16var_b32_tid_tid: 405; GFX12-GISEL: ; %bb.0: 406; GFX12-GISEL-NEXT: s_clause 0x1 407; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 408; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 409; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 410; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 411; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 412; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 413; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 414; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 415; GFX12-GISEL-NEXT: s_endpgm 416 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 417 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %tidx, i32 %tidx, i32 %src1, i1 false, i1 false) 418 store i32 %v, ptr addrspace(1) %out 419 ret void 420} 421 422define amdgpu_kernel void @v_permlane16var_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 423; GFX12-SDAG-LABEL: v_permlane16var_b32_undef_tid: 424; GFX12-SDAG: ; %bb.0: 425; GFX12-SDAG-NEXT: s_clause 0x1 426; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 427; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 428; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 429; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 430; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 431; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 432; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 433; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 434; GFX12-SDAG-NEXT: s_endpgm 435; 436; GFX12-GISEL-LABEL: v_permlane16var_b32_undef_tid: 437; GFX12-GISEL: ; %bb.0: 438; GFX12-GISEL-NEXT: s_clause 0x1 439; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 440; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 441; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 442; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 443; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 444; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 445; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 446; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 447; GFX12-GISEL-NEXT: s_endpgm 448 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 449 %undef = freeze i32 poison 450 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 false) 451 store i32 %v, ptr addrspace(1) %out 452 ret void 453} 454 455define amdgpu_kernel void @v_permlane16var_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 456; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid: 457; GFX12-SDAG: ; %bb.0: 458; GFX12-SDAG-NEXT: s_clause 0x1 459; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 460; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 461; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 462; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 463; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 0x3039 :: v_dual_mov_b32 v2, s2 464; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 465; GFX12-SDAG-NEXT: v_permlane16_var_b32 v1, v0, v2 466; GFX12-SDAG-NEXT: global_store_b32 v3, v1, s[0:1] 467; GFX12-SDAG-NEXT: s_endpgm 468; 469; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid: 470; GFX12-GISEL: ; %bb.0: 471; GFX12-GISEL-NEXT: s_clause 0x1 472; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 473; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 474; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0x3039 475; GFX12-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 476; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 477; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 478; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 479; GFX12-GISEL-NEXT: v_permlane16_var_b32 v1, v0, v2 480; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 481; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 482; GFX12-GISEL-NEXT: s_endpgm 483 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 484 %v = call i32 @llvm.amdgcn.permlane16.var(i32 12345, i32 %tidx, i32 %src1, i1 false, i1 false) 485 store i32 %v, ptr addrspace(1) %out 486 ret void 487} 488 489define amdgpu_kernel void @v_permlane16var_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 490; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_fi: 491; GFX12-SDAG: ; %bb.0: 492; GFX12-SDAG-NEXT: s_clause 0x1 493; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 494; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 495; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 496; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 497; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 498; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 499; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0] 500; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 501; GFX12-SDAG-NEXT: s_endpgm 502; 503; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_fi: 504; GFX12-GISEL: ; %bb.0: 505; GFX12-GISEL-NEXT: s_clause 0x1 506; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 507; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 508; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 509; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 510; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 511; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,0] 512; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 513; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 514; GFX12-GISEL-NEXT: s_endpgm 515 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 516 %undef = freeze i32 poison 517 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 false) 518 store i32 %v, ptr addrspace(1) %out 519 ret void 520} 521 522define amdgpu_kernel void @v_permlane16var_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 523; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_bc: 524; GFX12-SDAG: ; %bb.0: 525; GFX12-SDAG-NEXT: s_clause 0x1 526; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 527; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 528; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 529; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 530; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 531; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 532; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1] 533; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 534; GFX12-SDAG-NEXT: s_endpgm 535; 536; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_bc: 537; GFX12-GISEL: ; %bb.0: 538; GFX12-GISEL-NEXT: s_clause 0x1 539; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 540; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 541; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 542; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 543; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 544; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[0,1] 545; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 546; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 547; GFX12-GISEL-NEXT: s_endpgm 548 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 549 %undef = freeze i32 poison 550 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 true) 551 store i32 %v, ptr addrspace(1) %out 552 ret void 553} 554 555define amdgpu_kernel void @v_permlane16var_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 556; GFX12-SDAG-LABEL: v_permlane16var_b32_i_tid_fi_bc: 557; GFX12-SDAG: ; %bb.0: 558; GFX12-SDAG-NEXT: s_clause 0x1 559; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 560; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 561; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 562; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 563; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 564; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 565; GFX12-SDAG-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1] 566; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 567; GFX12-SDAG-NEXT: s_endpgm 568; 569; GFX12-GISEL-LABEL: v_permlane16var_b32_i_tid_fi_bc: 570; GFX12-GISEL: ; %bb.0: 571; GFX12-GISEL-NEXT: s_clause 0x1 572; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 573; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 574; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 575; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 576; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 577; GFX12-GISEL-NEXT: v_permlane16_var_b32 v0, v0, v1 op_sel:[1,1] 578; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 579; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 580; GFX12-GISEL-NEXT: s_endpgm 581 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 582 %undef = freeze i32 poison 583 %v = call i32 @llvm.amdgcn.permlane16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 true) 584 store i32 %v, ptr addrspace(1) %out 585 ret void 586} 587 588define amdgpu_kernel void @v_permlanex16var_b32_tid_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 589; GFX12-SDAG-LABEL: v_permlanex16var_b32_tid_tid: 590; GFX12-SDAG: ; %bb.0: 591; GFX12-SDAG-NEXT: s_clause 0x1 592; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 593; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 594; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 595; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 596; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 597; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 598; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 599; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 600; GFX12-SDAG-NEXT: s_endpgm 601; 602; GFX12-GISEL-LABEL: v_permlanex16var_b32_tid_tid: 603; GFX12-GISEL: ; %bb.0: 604; GFX12-GISEL-NEXT: s_clause 0x1 605; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 606; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 607; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 608; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 609; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 610; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 611; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 612; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 613; GFX12-GISEL-NEXT: s_endpgm 614 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 615 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %tidx, i32 %tidx, i32 %src1, i1 false, i1 false) 616 store i32 %v, ptr addrspace(1) %out 617 ret void 618} 619 620define amdgpu_kernel void @v_permlanex16var_b32_undef_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 621; GFX12-SDAG-LABEL: v_permlanex16var_b32_undef_tid: 622; GFX12-SDAG: ; %bb.0: 623; GFX12-SDAG-NEXT: s_clause 0x1 624; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 625; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 626; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 627; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 628; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 629; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 630; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 631; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 632; GFX12-SDAG-NEXT: s_endpgm 633; 634; GFX12-GISEL-LABEL: v_permlanex16var_b32_undef_tid: 635; GFX12-GISEL: ; %bb.0: 636; GFX12-GISEL-NEXT: s_clause 0x1 637; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 638; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 639; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 640; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 641; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 642; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 643; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 644; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 645; GFX12-GISEL-NEXT: s_endpgm 646 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 647 %undef = freeze i32 poison 648 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 false) 649 store i32 %v, ptr addrspace(1) %out 650 ret void 651} 652 653define amdgpu_kernel void @v_permlanex16var_b32_i_tid(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 654; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid: 655; GFX12-SDAG: ; %bb.0: 656; GFX12-SDAG-NEXT: s_clause 0x1 657; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 658; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 659; GFX12-SDAG-NEXT: v_dual_mov_b32 v3, 0 :: v_dual_and_b32 v0, 0x3ff, v0 660; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 661; GFX12-SDAG-NEXT: v_dual_mov_b32 v1, 0x3039 :: v_dual_mov_b32 v2, s2 662; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 663; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v1, v0, v2 664; GFX12-SDAG-NEXT: global_store_b32 v3, v1, s[0:1] 665; GFX12-SDAG-NEXT: s_endpgm 666; 667; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid: 668; GFX12-GISEL: ; %bb.0: 669; GFX12-GISEL-NEXT: s_clause 0x1 670; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 671; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 672; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0x3039 673; GFX12-GISEL-NEXT: v_and_b32_e32 v0, 0x3ff, v0 674; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 675; GFX12-GISEL-NEXT: v_mov_b32_e32 v2, s2 676; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 677; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v1, v0, v2 678; GFX12-GISEL-NEXT: v_mov_b32_e32 v0, 0 679; GFX12-GISEL-NEXT: global_store_b32 v0, v1, s[0:1] 680; GFX12-GISEL-NEXT: s_endpgm 681 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 682 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 12345, i32 %tidx, i32 %src1, i1 false, i1 false) 683 store i32 %v, ptr addrspace(1) %out 684 ret void 685} 686 687define amdgpu_kernel void @v_permlanex16var_b32_i_tid_fi(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 688; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_fi: 689; GFX12-SDAG: ; %bb.0: 690; GFX12-SDAG-NEXT: s_clause 0x1 691; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 692; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 693; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 694; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 695; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 696; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 697; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0] 698; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 699; GFX12-SDAG-NEXT: s_endpgm 700; 701; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_fi: 702; GFX12-GISEL: ; %bb.0: 703; GFX12-GISEL-NEXT: s_clause 0x1 704; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 705; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 706; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 707; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 708; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 709; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,0] 710; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 711; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 712; GFX12-GISEL-NEXT: s_endpgm 713 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 714 %undef = freeze i32 poison 715 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 false) 716 store i32 %v, ptr addrspace(1) %out 717 ret void 718} 719 720define amdgpu_kernel void @v_permlanex16var_b32_i_tid_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 721; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_bc: 722; GFX12-SDAG: ; %bb.0: 723; GFX12-SDAG-NEXT: s_clause 0x1 724; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 725; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 726; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 727; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 728; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 729; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 730; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1] 731; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 732; GFX12-SDAG-NEXT: s_endpgm 733; 734; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_bc: 735; GFX12-GISEL: ; %bb.0: 736; GFX12-GISEL-NEXT: s_clause 0x1 737; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 738; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 739; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 740; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 741; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 742; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[0,1] 743; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 744; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 745; GFX12-GISEL-NEXT: s_endpgm 746 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 747 %undef = freeze i32 poison 748 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 false, i1 true) 749 store i32 %v, ptr addrspace(1) %out 750 ret void 751} 752 753define amdgpu_kernel void @v_permlanex16var_b32_i_tid_fi_bc(ptr addrspace(1) %out, i32 %src0, i32 %src1) { 754; GFX12-SDAG-LABEL: v_permlanex16var_b32_i_tid_fi_bc: 755; GFX12-SDAG: ; %bb.0: 756; GFX12-SDAG-NEXT: s_clause 0x1 757; GFX12-SDAG-NEXT: s_load_b32 s2, s[4:5], 0x30 758; GFX12-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 759; GFX12-SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0 760; GFX12-SDAG-NEXT: s_wait_kmcnt 0x0 761; GFX12-SDAG-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s2 762; GFX12-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) 763; GFX12-SDAG-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1] 764; GFX12-SDAG-NEXT: global_store_b32 v2, v0, s[0:1] 765; GFX12-SDAG-NEXT: s_endpgm 766; 767; GFX12-GISEL-LABEL: v_permlanex16var_b32_i_tid_fi_bc: 768; GFX12-GISEL: ; %bb.0: 769; GFX12-GISEL-NEXT: s_clause 0x1 770; GFX12-GISEL-NEXT: s_load_b32 s2, s[4:5], 0x30 771; GFX12-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 772; GFX12-GISEL-NEXT: s_wait_kmcnt 0x0 773; GFX12-GISEL-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_and_b32 v0, 0x3ff, v0 774; GFX12-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) 775; GFX12-GISEL-NEXT: v_permlanex16_var_b32 v0, v0, v1 op_sel:[1,1] 776; GFX12-GISEL-NEXT: v_mov_b32_e32 v1, 0 777; GFX12-GISEL-NEXT: global_store_b32 v1, v0, s[0:1] 778; GFX12-GISEL-NEXT: s_endpgm 779 %tidx = call i32 @llvm.amdgcn.workitem.id.x() 780 %undef = freeze i32 poison 781 %v = call i32 @llvm.amdgcn.permlanex16.var(i32 %undef, i32 %tidx, i32 %src1, i1 true, i1 true) 782 store i32 %v, ptr addrspace(1) %out 783 ret void 784} 785