1; RUN: llc -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX8-OPT,GCN-OPT %s 2; RUN: llc -mtriple=amdgcn -mcpu=tonga -O0 -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX8,GFX8-NOOPT %s 3; RUN: llc -mtriple=amdgcn -mcpu=gfx1010 -mattr=-flat-for-global -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX10,GCN-OPT %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-flat-for-global -amdgpu-enable-vopd=0 -amdgpu-dpp-combine=false -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11,GCN-OPT %s 5 6; GCN-LABEL: {{^}}dpp_test: 7; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 8; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 9; GFX8-OPT: s_mov 10; GFX8-OPT: s_mov 11; GFX8-NOOPT: s_nop 1 12; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 13define amdgpu_kernel void @dpp_test(ptr addrspace(1) %out, i32 %in1, i32 %in2) { 14 %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 1, i32 1, i32 1, i1 false) #0 15 store i32 %tmp0, ptr addrspace(1) %out 16 ret void 17} 18 19; GCN-LABEL: {{^}}dpp_test_bc: 20; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 21; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 22; GFX8-OPT: s_mov 23; GFX8-OPT: s_mov 24; GFX8-NOOPT: s_nop 1 25; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[2,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:1{{$}} 26define amdgpu_kernel void @dpp_test_bc(ptr addrspace(1) %out, i32 %in1, i32 %in2) { 27 %tmp0 = call i32 @llvm.amdgcn.update.dpp.i32(i32 %in1, i32 %in2, i32 2, i32 1, i32 1, i1 true) #0 28 store i32 %tmp0, ptr addrspace(1) %out 29 ret void 30} 31 32 33; GCN-LABEL: {{^}}dpp_test1: 34; GFX10,GFX11: v_add_nc_u32_e32 [[REG:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} 35; GFX8-OPT: v_add_u32_e32 [[REG:v[0-9]+]], vcc, v{{[0-9]+}}, v{{[0-9]+}} 36; GFX8-NOOPT: v_add_u32_e64 [[REG:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, v{{[0-9]+}} 37; GFX8-NOOPT: v_mov_b32_e32 v{{[0-9]+}}, 0 38; GFX8: s_nop 1 39; GFX8-NEXT: v_mov_b32_dpp {{v[0-9]+}}, [[REG]] quad_perm:[1,0,3,2] row_mask:0xf bank_mask:0xf 40@0 = internal unnamed_addr addrspace(3) global [448 x i32] undef, align 4 41define weak_odr amdgpu_kernel void @dpp_test1(ptr %arg) local_unnamed_addr { 42bb: 43 %tmp = tail call i32 @llvm.amdgcn.workitem.id.x() 44 %tmp1 = zext i32 %tmp to i64 45 %tmp2 = getelementptr inbounds [448 x i32], ptr addrspace(3) @0, i32 0, i32 %tmp 46 %tmp3 = load i32, ptr addrspace(3) %tmp2, align 4 47 fence syncscope("workgroup-one-as") release 48 tail call void @llvm.amdgcn.s.barrier() 49 fence syncscope("workgroup-one-as") acquire 50 %tmp4 = add nsw i32 %tmp3, %tmp3 51 %tmp5 = tail call i32 @llvm.amdgcn.update.dpp.i32(i32 0, i32 %tmp4, i32 177, i32 15, i32 15, i1 zeroext false) 52 %tmp6 = add nsw i32 %tmp5, %tmp4 53 %tmp7 = getelementptr inbounds i32, ptr %arg, i64 %tmp1 54 store i32 %tmp6, ptr %tmp7, align 4 55 ret void 56} 57 58; GCN-LABEL: {{^}}update_dppi64_test: 59; GCN: load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]] 60; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 61; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 62; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 63; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 64define amdgpu_kernel void @update_dppi64_test(ptr addrspace(1) %arg, i64 %in1, i64 %in2) { 65 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 66 %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id 67 %load = load i64, ptr addrspace(1) %gep 68 %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 %load, i32 1, i32 1, i32 1, i1 false) #0 69 store i64 %tmp0, ptr addrspace(1) %gep 70 ret void 71} 72 73; GCN-LABEL: {{^}}update_dppf64_test: 74; GCN: load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]] 75; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 76; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 77; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 78; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 79define amdgpu_kernel void @update_dppf64_test(ptr addrspace(1) %arg, double %in1, double %in2) { 80 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 81 %gep = getelementptr inbounds double, ptr addrspace(1) %arg, i32 %id 82 %load = load double, ptr addrspace(1) %gep 83 %tmp0 = call double @llvm.amdgcn.update.dpp.f64(double %in1, double %load, i32 1, i32 1, i32 1, i1 false) #0 84 store double %tmp0, ptr addrspace(1) %gep 85 ret void 86} 87 88; GCN-LABEL: {{^}}update_dppv2i32_test: 89; GCN: load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]] 90; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 91; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 92; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 93; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 94define amdgpu_kernel void @update_dppv2i32_test(ptr addrspace(1) %arg, <2 x i32> %in1, <2 x i32> %in2) { 95 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 96 %gep = getelementptr inbounds <2 x i32>, ptr addrspace(1) %arg, i32 %id 97 %load = load <2 x i32>, ptr addrspace(1) %gep 98 %tmp0 = call <2 x i32> @llvm.amdgcn.update.dpp.v2i32(<2 x i32> %in1, <2 x i32> %load, i32 1, i32 1, i32 1, i1 false) #0 99 store <2 x i32> %tmp0, ptr addrspace(1) %gep 100 ret void 101} 102 103; GCN-LABEL: {{^}}update_dppv2f32_test: 104; GCN: load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]] 105; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 106; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 107; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 108; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 109define amdgpu_kernel void @update_dppv2f32_test(ptr addrspace(1) %arg, <2 x float> %in1, <2 x float> %in2) { 110 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 111 %gep = getelementptr inbounds <2 x float>, ptr addrspace(1) %arg, i32 %id 112 %load = load <2 x float>, ptr addrspace(1) %gep 113 %tmp0 = call <2 x float> @llvm.amdgcn.update.dpp.v2f32(<2 x float> %in1, <2 x float> %load, i32 1, i32 1, i32 1, i1 false) #0 114 store <2 x float> %tmp0, ptr addrspace(1) %gep 115 ret void 116} 117 118; GCN-LABEL: {{^}}update_dpp_p0_test: 119; GCN: load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]] 120; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 121; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 122; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 123; GCN-NOOPT: v_mov_b32_dpp v{{[0-9]+}}, v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 124define amdgpu_kernel void @update_dpp_p0_test(ptr addrspace(1) %arg, ptr %in1, ptr %in2) { 125 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 126 %gep = getelementptr inbounds ptr, ptr addrspace(1) %arg, i32 %id 127 %load = load ptr, ptr addrspace(1) %gep 128 %tmp0 = call ptr @llvm.amdgcn.update.dpp.p0(ptr %in1, ptr %load, i32 1, i32 1, i32 1, i1 false) #0 129 store ptr %tmp0, ptr addrspace(1) %gep 130 ret void 131} 132 133; GCN-LABEL: {{^}}update_dpp_p3_test: 134; GCN: {{load|read}}_{{dword|b32}} v[[SRC:[0-9]+]] 135; GCN: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 136define amdgpu_kernel void @update_dpp_p3_test(ptr addrspace(3) %arg, ptr addrspace(3) %in1, ptr %in2) { 137 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 138 %gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %arg, i32 %id 139 %load = load ptr addrspace(3), ptr addrspace(3) %gep 140 %tmp0 = call ptr addrspace(3) @llvm.amdgcn.update.dpp.p3(ptr addrspace(3) %in1, ptr addrspace(3) %load, i32 1, i32 1, i32 1, i1 false) #0 141 store ptr addrspace(3) %tmp0, ptr addrspace(3) %gep 142 ret void 143} 144 145; GCN-LABEL: {{^}}update_dpp_p5_test: 146; GCN: {{load|read}}_{{dword|b32}} v[[SRC:[0-9]+]] 147; GCN: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 148define amdgpu_kernel void @update_dpp_p5_test(ptr addrspace(5) %arg, ptr addrspace(5) %in1, ptr %in2) { 149 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 150 %gep = getelementptr inbounds ptr addrspace(5), ptr addrspace(5) %arg, i32 %id 151 %load = load ptr addrspace(5), ptr addrspace(5) %gep 152 %tmp0 = call ptr addrspace(5) @llvm.amdgcn.update.dpp.p5(ptr addrspace(5) %in1, ptr addrspace(5) %load, i32 1, i32 1, i32 1, i1 false) #0 153 store ptr addrspace(5) %tmp0, ptr addrspace(5) %gep 154 ret void 155} 156 157; GCN-LABEL: {{^}}update_dppi64_imm_old_test: 158; GCN-OPT-DAG: v_mov_b32_e32 v[[OLD_LO:[0-9]+]], 0x3afaedd9 159; GFX8-OPT-DAG,GFX10-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x7047 160; GFX11-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x7047 161; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_LO:[0-9]+]], 0x3afaedd9 162; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_HI:[0-9]+]], 0x7047 163; GCN-DAG: load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]] 164; GCN-OPT-DAG: v_mov_b32_dpp v[[OLD_LO]], v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 165; GFX8-OPT-DAG,GFX10-DAG,GFX11-DAG: v_mov_b32_dpp v[[OLD_HI]], v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 166; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 167; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 168define amdgpu_kernel void @update_dppi64_imm_old_test(ptr addrspace(1) %arg, i64 %in2) { 169 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 170 %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id 171 %load = load i64, ptr addrspace(1) %gep 172 %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 123451234512345, i64 %load, i32 1, i32 1, i32 1, i1 false) #0 173 store i64 %tmp0, ptr addrspace(1) %gep 174 ret void 175} 176 177; GCN-LABEL: {{^}}update_dppf64_imm_old_test: 178; GCN-OPT-DAG: v_mov_b32_e32 v[[OLD_LO:[0-9]+]], 0x6b8564a 179; GFX8-OPT-DAG,GFX10-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x405edce1 180; GFX11-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x405edce1 181; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_LO:[0-9]+]], 0x6b8564a 182; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_HI:[0-9]+]], 0x405edce1 183; GCN-DAG: load_{{dwordx2|b64}} v[[[SRC_LO:[0-9]+]]:[[SRC_HI:[0-9]+]]] 184; GCN-OPT-DAG: v_mov_b32_dpp v[[OLD_LO]], v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 185; GFX8-OPT-DAG,GFX10-DAG,GFX11-DAG: v_mov_b32_dpp v[[OLD_HI]], v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 186; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 187; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 188define amdgpu_kernel void @update_dppf64_imm_old_test(ptr addrspace(1) %arg, double %in2) { 189 %id = tail call i32 @llvm.amdgcn.workitem.id.x() 190 %gep = getelementptr inbounds i64, ptr addrspace(1) %arg, i32 %id 191 %load = load double, ptr addrspace(1) %gep 192 %tmp0 = call double @llvm.amdgcn.update.dpp.f64(double 123.4512345123450, double %load, i32 1, i32 1, i32 1, i1 false) #0 193 store double %tmp0, ptr addrspace(1) %gep 194 ret void 195} 196 197; GCN-LABEL: {{^}}update_dppi64_imm_src_test: 198; GCN-OPT-DAG: v_mov_b32_e32 v[[OLD_LO:[0-9]+]], 0x3afaedd9 199; GCN-OPT-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x7047 200; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_LO:[0-9]+]], 0x3afaedd9 201; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_HI:[0-9]+]], 0x7047 202; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[OLD_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 203; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[OLD_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 204; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 205; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 206define amdgpu_kernel void @update_dppi64_imm_src_test(ptr addrspace(1) %out, i64 %in1) { 207 %tmp0 = call i64 @llvm.amdgcn.update.dpp.i64(i64 %in1, i64 123451234512345, i32 1, i32 1, i32 1, i1 false) #0 208 store i64 %tmp0, ptr addrspace(1) %out 209 ret void 210} 211 212; GCN-LABEL: {{^}}update_dppf64_imm_src_test: 213; GCN-OPT-DAG: v_mov_b32_e32 v[[OLD_LO:[0-9]+]], 0x6b8564a 214; GCN-OPT-DAG: v_mov_b32_e32 v[[OLD_HI:[0-9]+]], 0x405edce1 215; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_LO:[0-9]+]], 0x6b8564a 216; GFX8-NOOPT-DAG: s_mov_b32 s[[SOLD_HI:[0-9]+]], 0x405edce1 217; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[OLD_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 218; GCN-OPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[OLD_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 219; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_LO]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 220; GCN-NOOPT-DAG: v_mov_b32_dpp v{{[0-9]+}}, v[[SRC_HI]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 221define amdgpu_kernel void @update_dppf64_imm_src_test(ptr addrspace(1) %out, double %in1) { 222 %tmp0 = call double @llvm.amdgcn.update.dpp.f64(double %in1, double 123.451234512345, i32 1, i32 1, i32 1, i1 false) #0 223 store double %tmp0, ptr addrspace(1) %out 224 ret void 225} 226 227; GCN-LABEL: {{^}}dpp_test_f32: 228; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 229; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 230; GFX8-OPT: s_mov 231; GFX8-OPT: s_mov 232; GFX8-NOOPT: s_nop 1 233; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 234define amdgpu_kernel void @dpp_test_f32(ptr addrspace(1) %out, float %in1, float %in2) { 235 %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 1, i32 1, i32 1, i1 false) 236 store float %tmp0, ptr addrspace(1) %out 237 ret void 238} 239 240; GCN-LABEL: {{^}}dpp_test_f32_imm_comb1: 241; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 242; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 243; GFX8-OPT: s_mov 244; GFX8-OPT: s_mov 245; GFX8-NOOPT: s_nop 1 246; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,0] row_mask:0x0 bank_mask:0x0{{$}} 247define amdgpu_kernel void @dpp_test_f32_imm_comb1(ptr addrspace(1) %out, float %in1, float %in2) { 248 %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 0, i32 0, i32 0, i1 false) 249 store float %tmp0, ptr addrspace(1) %out 250 ret void 251} 252 253; GCN-LABEL: {{^}}dpp_test_f32_imm_comb2: 254; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 255; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 256; GFX8-OPT: s_mov 257; GFX8-OPT: s_mov 258; GFX8-NOOPT: s_nop 1 259; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,0,0,0] row_mask:0x3 bank_mask:0x3{{$}} 260define amdgpu_kernel void @dpp_test_f32_imm_comb2(ptr addrspace(1) %out, float %in1, float %in2) { 261 %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 3, i32 3, i32 3, i1 false) 262 store float %tmp0, ptr addrspace(1) %out 263 ret void 264} 265 266; GCN-LABEL: {{^}}dpp_test_f32_imm_comb3: 267; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 268; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 269; GFX8-OPT: s_mov 270; GFX8-OPT: s_mov 271; GFX8-NOOPT: s_nop 1 272; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x2 bank_mask:0x3 bound_ctrl:1{{$}} 273define amdgpu_kernel void @dpp_test_f32_imm_comb3(ptr addrspace(1) %out, float %in1, float %in2) { 274 %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 1, i32 2, i32 3, i1 true) 275 store float %tmp0, ptr addrspace(1) %out 276 ret void 277} 278 279; GCN-LABEL: {{^}}dpp_test_f32_imm_comb4: 280; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 281; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 282; GFX8-OPT: s_mov 283; GFX8-OPT: s_mov 284; GFX8-NOOPT: s_nop 1 285; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,1,0,0] row_mask:0x3 bank_mask:0x2 bound_ctrl:1{{$}} 286define amdgpu_kernel void @dpp_test_f32_imm_comb4(ptr addrspace(1) %out, float %in1, float %in2) { 287 %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 4, i32 3, i32 2, i1 true) 288 store float %tmp0, ptr addrspace(1) %out 289 ret void 290} 291 292; GCN-LABEL: {{^}}dpp_test_f32_imm_comb5: 293; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 294; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 295; GFX8-OPT: s_mov 296; GFX8-OPT: s_mov 297; GFX8-NOOPT: s_nop 1 298; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}} 299define amdgpu_kernel void @dpp_test_f32_imm_comb5(ptr addrspace(1) %out, float %in1, float %in2) { 300 %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 63, i32 14, i32 13, i1 true) 301 store float %tmp0, ptr addrspace(1) %out 302 ret void 303} 304 305; GCN-LABEL: {{^}}dpp_test_f32_imm_comb6: 306; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 307; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 308; GFX8-OPT: s_mov 309; GFX8-OPT: s_mov 310; GFX8-NOOPT: s_nop 1 311; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} 312define amdgpu_kernel void @dpp_test_f32_imm_comb6(ptr addrspace(1) %out, float %in1, float %in2) { 313 %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 63, i32 15, i32 15, i1 true) 314 store float %tmp0, ptr addrspace(1) %out 315 ret void 316} 317 318 319; GCN-LABEL: {{^}}dpp_test_f32_imm_comb7: 320; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 321; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 322; GFX8-OPT: s_mov 323; GFX8-OPT: s_mov 324; GFX8-NOOPT: s_nop 1 325; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}} 326define amdgpu_kernel void @dpp_test_f32_imm_comb7(ptr addrspace(1) %out, float %in1, float %in2) { 327 %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 64, i32 0, i32 0, i1 true) 328 store float %tmp0, ptr addrspace(1) %out 329 ret void 330} 331 332; GCN-LABEL: {{^}}dpp_test_f32_imm_comb8: 333; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 334; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 335; GFX8-OPT: s_mov 336; GFX8-OPT: s_mov 337; GFX8-NOOPT: s_nop 1 338; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}} 339define amdgpu_kernel void @dpp_test_f32_imm_comb8(ptr addrspace(1) %out, float %in1, float %in2) { 340 %tmp0 = call float @llvm.amdgcn.update.dpp.f32(float %in1, float %in2, i32 31, i32 15, i32 0, i1 true) 341 store float %tmp0, ptr addrspace(1) %out 342 ret void 343} 344 345; GCN-LABEL: {{^}}dpp_test_v2i16: 346; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 347; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 348; GFX8-OPT: s_mov 349; GFX8-OPT: s_mov 350; GFX8-NOOPT: s_nop 1 351; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 352define amdgpu_kernel void @dpp_test_v2i16(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) { 353 %tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 1, i32 1, i32 1, i1 false) 354 store <2 x i16> %tmp0, ptr addrspace(1) %out 355 ret void 356} 357 358; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb1: 359; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 360; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 361; GFX8-OPT: s_mov 362; GFX8-OPT: s_mov 363; GFX8-NOOPT: s_nop 1 364; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,0] row_mask:0x0 bank_mask:0x0{{$}} 365define amdgpu_kernel void @dpp_test_v2i16_imm_comb1(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) { 366 %tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 0, i32 0, i32 0, i1 false) 367 store <2 x i16> %tmp0, ptr addrspace(1) %out 368 ret void 369} 370 371; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb2: 372; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 373; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 374; GFX8-OPT: s_mov 375; GFX8-OPT: s_mov 376; GFX8-NOOPT: s_nop 1 377; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,0,0,0] row_mask:0x3 bank_mask:0x3{{$}} 378define amdgpu_kernel void @dpp_test_v2i16_imm_comb2(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) { 379 %tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 3, i32 3, i32 3, i1 false) 380 store <2 x i16> %tmp0, ptr addrspace(1) %out 381 ret void 382} 383 384 ; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb3: 385; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 386; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 387; GFX8-OPT: s_mov 388; GFX8-OPT: s_mov 389; GFX8-NOOPT: s_nop 1 390; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x2 bank_mask:0x3 bound_ctrl:1{{$}} 391define amdgpu_kernel void @dpp_test_v2i16_imm_comb3(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) { 392 %tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 1, i32 2, i32 3, i1 true) 393 store <2 x i16> %tmp0, ptr addrspace(1) %out 394 ret void 395} 396 397; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb4: 398; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 399; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 400; GFX8-OPT: s_mov 401; GFX8-OPT: s_mov 402; GFX8-NOOPT: s_nop 1 403; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,1,0,0] row_mask:0x3 bank_mask:0x2 bound_ctrl:1{{$}} 404define amdgpu_kernel void @dpp_test_v2i16_imm_comb4(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) { 405 %tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 4, i32 3, i32 2, i1 true) 406 store <2 x i16> %tmp0, ptr addrspace(1) %out 407 ret void 408} 409 410; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb5: 411; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 412; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 413; GFX8-OPT: s_mov 414; GFX8-OPT: s_mov 415; GFX8-NOOPT: s_nop 1 416; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}} 417define amdgpu_kernel void @dpp_test_v2i16_imm_comb5(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) { 418 %tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 63, i32 14, i32 13, i1 true) 419 store <2 x i16> %tmp0, ptr addrspace(1) %out 420 ret void 421} 422 423; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb6: 424; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 425; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 426; GFX8-OPT: s_mov 427; GFX8-OPT: s_mov 428; GFX8-NOOPT: s_nop 1 429; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} 430define amdgpu_kernel void @dpp_test_v2i16_imm_comb6(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) { 431 %tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 63, i32 15, i32 15, i1 true) 432 store <2 x i16> %tmp0, ptr addrspace(1) %out 433 ret void 434} 435 436; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb7: 437; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 438; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 439; GFX8-OPT: s_mov 440; GFX8-OPT: s_mov 441; GFX8-NOOPT: s_nop 1 442; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}} 443define amdgpu_kernel void @dpp_test_v2i16_imm_comb7(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) { 444 %tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 64, i32 0, i32 0, i1 true) 445 store <2 x i16> %tmp0, ptr addrspace(1) %out 446 ret void 447} 448 449; GCN-LABEL: {{^}}dpp_test_v2i16_imm_comb8: 450; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 451; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 452; GFX8-OPT: s_mov 453; GFX8-OPT: s_mov 454; GFX8-NOOPT: s_nop 1 455; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}} 456define amdgpu_kernel void @dpp_test_v2i16_imm_comb8(ptr addrspace(1) %out, <2 x i16> %in1, <2 x i16> %in2) { 457 %tmp0 = call <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16> %in1, <2 x i16> %in2, i32 31, i32 15, i32 0, i1 true) 458 store <2 x i16> %tmp0, ptr addrspace(1) %out 459 ret void 460} 461 462; GCN-LABEL: {{^}}dpp_test_v2f16: 463; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 464; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 465; GFX8-OPT: s_mov 466; GFX8-OPT: s_mov 467; GFX8-NOOPT: s_nop 1 468; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1{{$}} 469define amdgpu_kernel void @dpp_test_v2f16(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) { 470 %tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 1, i32 1, i32 1, i1 false) 471 store <2 x half> %tmp0, ptr addrspace(1) %out 472 ret void 473} 474 475; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb1: 476; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 477; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 478; GFX8-OPT: s_mov 479; GFX8-OPT: s_mov 480; GFX8-NOOPT: s_nop 1 481; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,0] row_mask:0x0 bank_mask:0x0{{$}} 482define amdgpu_kernel void @dpp_test_v2f16_imm_comb1(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) { 483 %tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 0, i32 0, i32 0, i1 false) 484 store <2 x half> %tmp0, ptr addrspace(1) %out 485 ret void 486} 487 488; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb2: 489; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 490; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 491; GFX8-OPT: s_mov 492; GFX8-OPT: s_mov 493; GFX8-NOOPT: s_nop 1 494; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,0,0,0] row_mask:0x3 bank_mask:0x3{{$}} 495define amdgpu_kernel void @dpp_test_v2f16_imm_comb2(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) { 496 %tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 3, i32 3, i32 3, i1 false) 497 store <2 x half> %tmp0, ptr addrspace(1) %out 498 ret void 499} 500 501 ; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb3: 502; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 503; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 504; GFX8-OPT: s_mov 505; GFX8-OPT: s_mov 506; GFX8-NOOPT: s_nop 1 507; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[1,0,0,0] row_mask:0x2 bank_mask:0x3 bound_ctrl:1{{$}} 508define amdgpu_kernel void @dpp_test_v2f16_imm_comb3(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) { 509 %tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 1, i32 2, i32 3, i1 true) 510 store <2 x half> %tmp0, ptr addrspace(1) %out 511 ret void 512} 513 514; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb4: 515; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 516; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 517; GFX8-OPT: s_mov 518; GFX8-OPT: s_mov 519; GFX8-NOOPT: s_nop 1 520; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,1,0,0] row_mask:0x3 bank_mask:0x2 bound_ctrl:1{{$}} 521define amdgpu_kernel void @dpp_test_v2f16_imm_comb4(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) { 522 %tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 4, i32 3, i32 2, i1 true) 523 store <2 x half> %tmp0, ptr addrspace(1) %out 524 ret void 525} 526 527; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb5: 528; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 529; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 530; GFX8-OPT: s_mov 531; GFX8-OPT: s_mov 532; GFX8-NOOPT: s_nop 1 533; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xe bank_mask:0xd bound_ctrl:1{{$}} 534define amdgpu_kernel void @dpp_test_v2f16_imm_comb5(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) { 535 %tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 63, i32 14, i32 13, i1 true) 536 store <2 x half> %tmp0, ptr addrspace(1) %out 537 ret void 538} 539 540; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb6: 541; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 542; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 543; GFX8-OPT: s_mov 544; GFX8-OPT: s_mov 545; GFX8-NOOPT: s_nop 1 546; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,3,0] row_mask:0xf bank_mask:0xf bound_ctrl:1{{$}} 547define amdgpu_kernel void @dpp_test_v2f16_imm_comb6(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) { 548 %tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 63, i32 15, i32 15, i1 true) 549 store <2 x half> %tmp0, ptr addrspace(1) %out 550 ret void 551} 552 553; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb7: 554; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 555; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 556; GFX8-OPT: s_mov 557; GFX8-OPT: s_mov 558; GFX8-NOOPT: s_nop 1 559; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[0,0,0,1] row_mask:0x0 bank_mask:0x0 bound_ctrl:1{{$}} 560define amdgpu_kernel void @dpp_test_v2f16_imm_comb7(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) { 561 %tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 64, i32 0, i32 0, i1 true) 562 store <2 x half> %tmp0, ptr addrspace(1) %out 563 ret void 564} 565 566; GCN-LABEL: {{^}}dpp_test_v2f16_imm_comb8: 567; GCN: v_mov_b32_e32 [[DST:v[0-9]+]], s{{[0-9]+}} 568; GCN: v_mov_b32_e32 [[SRC:v[0-9]+]], s{{[0-9]+}} 569; GFX8-OPT: s_mov 570; GFX8-OPT: s_mov 571; GFX8-NOOPT: s_nop 1 572; GCN: v_mov_b32_dpp [[DST]], [[SRC]] quad_perm:[3,3,1,0] row_mask:0xf bank_mask:0x0 bound_ctrl:1{{$}} 573define amdgpu_kernel void @dpp_test_v2f16_imm_comb8(ptr addrspace(1) %out, <2 x half> %in1, <2 x half> %in2) { 574 %tmp0 = call <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half> %in1, <2 x half> %in2, i32 31, i32 15, i32 0, i1 true) 575 store <2 x half> %tmp0, ptr addrspace(1) %out 576 ret void 577} 578 579declare i32 @llvm.amdgcn.workitem.id.x() 580declare void @llvm.amdgcn.s.barrier() 581declare i32 @llvm.amdgcn.update.dpp.i32(i32, i32, i32, i32, i32, i1) #0 582declare <2 x i16> @llvm.amdgcn.update.dpp.v2i16(<2 x i16>, <2 x i16>, i32, i32, i32, i1) #0 583declare <2 x half> @llvm.amdgcn.update.dpp.v2f16(<2 x half>, <2 x half>, i32, i32, i32, i1) #0 584declare float @llvm.amdgcn.update.dpp.f32(float, float, i32, i32, i32, i1) #0 585declare i64 @llvm.amdgcn.update.dpp.i64(i64, i64, i32, i32, i32, i1) #0 586 587attributes #0 = { nounwind readnone convergent } 588