1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -global-isel-abort=2 -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s 3 4define amdgpu_kernel void @set_inactive(ptr addrspace(1) %out, i32 %in) { 5; GCN-LABEL: set_inactive: 6; GCN: ; %bb.0: 7; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c 8; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 9; GCN-NEXT: s_mov_b32 s2, -1 10; GCN-NEXT: s_waitcnt lgkmcnt(0) 11; GCN-NEXT: v_mov_b32_e32 v1, s3 12; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 13; GCN-NEXT: v_cndmask_b32_e64 v0, 42, v1, s[4:5] 14; GCN-NEXT: s_mov_b64 exec, s[4:5] 15; GCN-NEXT: v_mov_b32_e32 v1, v0 16; GCN-NEXT: s_mov_b32 s3, 0xf000 17; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 18; GCN-NEXT: s_endpgm 19 %tmp.0 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0 20 %tmp = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp.0) 21 store i32 %tmp, ptr addrspace(1) %out 22 ret void 23} 24 25define amdgpu_kernel void @set_inactive_imm_poison(ptr addrspace(1) %out) { 26; GCN-LABEL: set_inactive_imm_poison: 27; GCN: ; %bb.0: 28; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 29; GCN-NEXT: v_mov_b32_e32 v0, 1 30; GCN-NEXT: v_mov_b32_e32 v0, v0 31; GCN-NEXT: s_mov_b32 s2, -1 32; GCN-NEXT: s_mov_b32 s3, 0xf000 33; GCN-NEXT: s_waitcnt lgkmcnt(0) 34; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0 35; GCN-NEXT: s_endpgm 36 %tmp.0 = call i32 @llvm.amdgcn.set.inactive.i32(i32 1, i32 poison) #0 37 %tmp = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp.0) 38 store i32 %tmp, ptr addrspace(1) %out 39 ret void 40} 41 42define amdgpu_kernel void @set_inactive_64(ptr addrspace(1) %out, i64 %in) { 43; GCN-LABEL: set_inactive_64: 44; GCN: ; %bb.0: 45; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 46; GCN-NEXT: s_waitcnt lgkmcnt(0) 47; GCN-NEXT: v_mov_b32_e32 v2, s2 48; GCN-NEXT: v_mov_b32_e32 v3, s3 49; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 50; GCN-NEXT: s_mov_b32 s2, -1 51; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v2, s[4:5] 52; GCN-NEXT: v_cndmask_b32_e64 v1, 0, v3, s[4:5] 53; GCN-NEXT: s_mov_b64 exec, s[4:5] 54; GCN-NEXT: v_mov_b32_e32 v2, v0 55; GCN-NEXT: v_mov_b32_e32 v3, v1 56; GCN-NEXT: s_mov_b32 s3, 0xf000 57; GCN-NEXT: buffer_store_dwordx2 v[2:3], off, s[0:3], 0 58; GCN-NEXT: s_endpgm 59 %tmp.0 = call i64 @llvm.amdgcn.set.inactive.i64(i64 %in, i64 0) #0 60 %tmp = call i64 @llvm.amdgcn.strict.wwm.i64(i64 %tmp.0) 61 store i64 %tmp, ptr addrspace(1) %out 62 ret void 63} 64 65define amdgpu_kernel void @set_inactive_imm_poison_64(ptr addrspace(1) %out) { 66; GCN-LABEL: set_inactive_imm_poison_64: 67; GCN: ; %bb.0: 68; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 69; GCN-NEXT: v_mov_b32_e32 v0, 1 70; GCN-NEXT: v_mov_b32_e32 v1, 0 71; GCN-NEXT: v_mov_b32_e32 v0, v0 72; GCN-NEXT: v_mov_b32_e32 v1, v1 73; GCN-NEXT: s_mov_b32 s2, -1 74; GCN-NEXT: s_mov_b32 s3, 0xf000 75; GCN-NEXT: s_waitcnt lgkmcnt(0) 76; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 77; GCN-NEXT: s_endpgm 78 %tmp.0 = call i64 @llvm.amdgcn.set.inactive.i64(i64 1, i64 poison) #0 79 %tmp = call i64 @llvm.amdgcn.strict.wwm.i64(i64 %tmp.0) 80 store i64 %tmp, ptr addrspace(1) %out 81 ret void 82} 83 84define amdgpu_kernel void @set_inactive_scc(ptr addrspace(1) %out, i32 %in, <4 x i32> inreg %desc) { 85; GCN-LABEL: set_inactive_scc: 86; GCN: ; %bb.0: 87; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 88; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c 89; GCN-NEXT: s_waitcnt lgkmcnt(0) 90; GCN-NEXT: s_buffer_load_dword s7, s[0:3], 0x0 91; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 92; GCN-NEXT: v_mov_b32_e32 v1, s6 93; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1 94; GCN-NEXT: v_cndmask_b32_e64 v0, 42, v1, s[2:3] 95; GCN-NEXT: s_mov_b64 exec, s[2:3] 96; GCN-NEXT: s_waitcnt lgkmcnt(0) 97; GCN-NEXT: s_cmp_lg_u32 s7, 56 98; GCN-NEXT: v_mov_b32_e32 v1, v0 99; GCN-NEXT: s_mov_b32 s2, 1 100; GCN-NEXT: s_cbranch_scc0 .LBB4_2 101; GCN-NEXT: ; %bb.1: ; %.one 102; GCN-NEXT: v_add_u32_e32 v2, vcc, 1, v1 103; GCN-NEXT: s_mov_b32 s2, -1 104; GCN-NEXT: s_mov_b32 s3, 0xf000 105; GCN-NEXT: buffer_store_dword v2, off, s[0:3], 0 106; GCN-NEXT: s_mov_b32 s2, 0 107; GCN-NEXT: .LBB4_2: ; %Flow 108; GCN-NEXT: s_xor_b32 s2, s2, 1 109; GCN-NEXT: s_and_b32 s2, s2, 1 110; GCN-NEXT: s_cmp_lg_u32 s2, 0 111; GCN-NEXT: s_cbranch_scc1 .LBB4_4 112; GCN-NEXT: ; %bb.3: ; %.zero 113; GCN-NEXT: s_mov_b32 s2, -1 114; GCN-NEXT: s_mov_b32 s3, 0xf000 115; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 116; GCN-NEXT: .LBB4_4: ; %.exit 117; GCN-NEXT: s_endpgm 118 %val = call i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32> %desc, i32 0, i32 0) 119 %cmp = icmp eq i32 %val, 56 120 %tmp.0 = call i32 @llvm.amdgcn.set.inactive.i32(i32 %in, i32 42) #0 121 %tmp = call i32 @llvm.amdgcn.strict.wwm.i32(i32 %tmp.0) 122 br i1 %cmp, label %.zero, label %.one 123 124.zero: 125 store i32 %tmp, ptr addrspace(1) %out 126 br label %.exit 127 128.one: 129 %tmp.1 = add i32 %tmp, 1 130 store i32 %tmp.1, ptr addrspace(1) %out 131 br label %.exit 132 133.exit: 134 ret void 135} 136 137define amdgpu_kernel void @set_inactive_f32(ptr addrspace(1) %out, float %in) { 138; GCN-LABEL: set_inactive_f32: 139; GCN: ; %bb.0: 140; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c 141; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 142; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1 143; GCN-NEXT: v_mov_b32_e32 v0, 0x40400000 144; GCN-NEXT: s_mov_b64 exec, s[2:3] 145; GCN-NEXT: s_mov_b32 s2, -1 146; GCN-NEXT: s_waitcnt lgkmcnt(0) 147; GCN-NEXT: v_mov_b32_e32 v1, s6 148; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 149; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] 150; GCN-NEXT: s_mov_b64 exec, s[4:5] 151; GCN-NEXT: v_mov_b32_e32 v1, v0 152; GCN-NEXT: s_mov_b32 s3, 0xf000 153; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 154; GCN-NEXT: s_endpgm 155 %tmp.0 = call float @llvm.amdgcn.set.inactive.f32(float %in, float 3.0) #0 156 %tmp = call float @llvm.amdgcn.strict.wwm.f32(float %tmp.0) 157 store float %tmp, ptr addrspace(1) %out 158 ret void 159} 160 161define amdgpu_kernel void @set_inactive_f64(ptr addrspace(1) %out, double %in) { 162; GCN-LABEL: set_inactive_f64: 163; GCN: ; %bb.0: 164; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 165; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 166; GCN-NEXT: v_mov_b32_e32 v0, 0xcccccccd 167; GCN-NEXT: v_mov_b32_e32 v1, 0x4010cccc 168; GCN-NEXT: s_mov_b64 exec, s[4:5] 169; GCN-NEXT: s_waitcnt lgkmcnt(0) 170; GCN-NEXT: v_mov_b32_e32 v2, s2 171; GCN-NEXT: v_mov_b32_e32 v3, s3 172; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1 173; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[2:3] 174; GCN-NEXT: v_cndmask_b32_e64 v1, v1, v3, s[2:3] 175; GCN-NEXT: s_mov_b64 exec, s[2:3] 176; GCN-NEXT: v_mov_b32_e32 v2, v0 177; GCN-NEXT: v_mov_b32_e32 v3, v1 178; GCN-NEXT: s_mov_b32 s2, -1 179; GCN-NEXT: s_mov_b32 s3, 0xf000 180; GCN-NEXT: buffer_store_dwordx2 v[2:3], off, s[0:3], 0 181; GCN-NEXT: s_endpgm 182 %tmp.0 = call double @llvm.amdgcn.set.inactive.f64(double %in, double 4.2) #0 183 %tmp = call double @llvm.amdgcn.strict.wwm.f64(double %tmp.0) 184 store double %tmp, ptr addrspace(1) %out 185 ret void 186} 187 188define amdgpu_kernel void @set_inactive_v2i16(ptr addrspace(1) %out, <2 x i16> %in) { 189; GCN-LABEL: set_inactive_v2i16: 190; GCN: ; %bb.0: 191; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c 192; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 193; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1 194; GCN-NEXT: v_mov_b32_e32 v0, 0x10001 195; GCN-NEXT: s_mov_b64 exec, s[2:3] 196; GCN-NEXT: s_mov_b32 s2, -1 197; GCN-NEXT: s_waitcnt lgkmcnt(0) 198; GCN-NEXT: v_mov_b32_e32 v1, s6 199; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 200; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] 201; GCN-NEXT: s_mov_b64 exec, s[4:5] 202; GCN-NEXT: v_mov_b32_e32 v1, v0 203; GCN-NEXT: s_mov_b32 s3, 0xf000 204; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 205; GCN-NEXT: s_endpgm 206 %tmp.0 = call <2 x i16> @llvm.amdgcn.set.inactive.v2i16(<2 x i16> %in, <2 x i16> <i16 1, i16 1>) #0 207 %tmp = call <2 x i16> @llvm.amdgcn.strict.wwm.v2i16(<2 x i16> %tmp.0) 208 store <2 x i16> %tmp, ptr addrspace(1) %out 209 ret void 210} 211 212define amdgpu_kernel void @set_inactive_v2f16(ptr addrspace(1) %out, <2 x half> %in) { 213; GCN-LABEL: set_inactive_v2f16: 214; GCN: ; %bb.0: 215; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c 216; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 217; GCN-NEXT: s_or_saveexec_b64 s[2:3], -1 218; GCN-NEXT: v_mov_b32_e32 v0, 0x3c003c00 219; GCN-NEXT: s_mov_b64 exec, s[2:3] 220; GCN-NEXT: s_mov_b32 s2, -1 221; GCN-NEXT: s_waitcnt lgkmcnt(0) 222; GCN-NEXT: v_mov_b32_e32 v1, s6 223; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 224; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] 225; GCN-NEXT: s_mov_b64 exec, s[4:5] 226; GCN-NEXT: v_mov_b32_e32 v1, v0 227; GCN-NEXT: s_mov_b32 s3, 0xf000 228; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 229; GCN-NEXT: s_endpgm 230 %tmp.0 = call <2 x half> @llvm.amdgcn.set.inactive.v2f16(<2 x half> %in, <2 x half> <half 1.0, half 1.0>) #0 231 %tmp = call <2 x half> @llvm.amdgcn.strict.wwm.v2i16(<2 x half> %tmp.0) 232 store <2 x half> %tmp, ptr addrspace(1) %out 233 ret void 234} 235 236define amdgpu_kernel void @set_inactive_v2i32(ptr addrspace(1) %out, <2 x i32> %in) { 237; GCN-LABEL: set_inactive_v2i32: 238; GCN: ; %bb.0: 239; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 240; GCN-NEXT: s_waitcnt lgkmcnt(0) 241; GCN-NEXT: v_mov_b32_e32 v2, s2 242; GCN-NEXT: v_mov_b32_e32 v3, s3 243; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 244; GCN-NEXT: s_mov_b32 s2, -1 245; GCN-NEXT: v_cndmask_b32_e64 v0, 1, v2, s[4:5] 246; GCN-NEXT: v_cndmask_b32_e64 v1, 1, v3, s[4:5] 247; GCN-NEXT: s_mov_b64 exec, s[4:5] 248; GCN-NEXT: v_mov_b32_e32 v2, v0 249; GCN-NEXT: v_mov_b32_e32 v3, v1 250; GCN-NEXT: s_mov_b32 s3, 0xf000 251; GCN-NEXT: buffer_store_dwordx2 v[2:3], off, s[0:3], 0 252; GCN-NEXT: s_endpgm 253 %tmp.0 = call <2 x i32> @llvm.amdgcn.set.inactive.v2i32(<2 x i32> %in, <2 x i32> <i32 1, i32 1>) #0 254 %tmp = call <2 x i32> @llvm.amdgcn.strict.wwm.v2i32(<2 x i32> %tmp.0) 255 store <2 x i32> %tmp, ptr addrspace(1) %out 256 ret void 257} 258 259define amdgpu_kernel void @set_inactive_v2f32(ptr addrspace(1) %out, <2 x float> %in) { 260; GCN-LABEL: set_inactive_v2f32: 261; GCN: ; %bb.0: 262; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 263; GCN-NEXT: s_waitcnt lgkmcnt(0) 264; GCN-NEXT: v_mov_b32_e32 v2, s2 265; GCN-NEXT: v_mov_b32_e32 v3, s3 266; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 267; GCN-NEXT: s_mov_b32 s2, -1 268; GCN-NEXT: v_cndmask_b32_e64 v0, 1.0, v2, s[4:5] 269; GCN-NEXT: v_cndmask_b32_e64 v1, 1.0, v3, s[4:5] 270; GCN-NEXT: s_mov_b64 exec, s[4:5] 271; GCN-NEXT: v_mov_b32_e32 v2, v0 272; GCN-NEXT: v_mov_b32_e32 v3, v1 273; GCN-NEXT: s_mov_b32 s3, 0xf000 274; GCN-NEXT: buffer_store_dwordx2 v[2:3], off, s[0:3], 0 275; GCN-NEXT: s_endpgm 276 %tmp.0 = call <2 x float> @llvm.amdgcn.set.inactive.v2f32(<2 x float> %in, <2 x float> <float 1.0, float 1.0>) #0 277 %tmp = call <2 x float> @llvm.amdgcn.strict.wwm.v2f32(<2 x float> %tmp.0) 278 store <2 x float> %tmp, ptr addrspace(1) %out 279 ret void 280} 281 282define amdgpu_kernel void @set_inactive_v2bf16(ptr addrspace(1) %out, <2 x bfloat> %in) { 283; GCN-LABEL: set_inactive_v2bf16: 284; GCN: ; %bb.0: 285; GCN-NEXT: s_load_dword s6, s[4:5], 0x2c 286; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 287; GCN-NEXT: s_mov_b32 s3, 0xf000 288; GCN-NEXT: s_mov_b32 s2, -1 289; GCN-NEXT: s_waitcnt lgkmcnt(0) 290; GCN-NEXT: v_mov_b32_e32 v1, s6 291; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 292; GCN-NEXT: v_mov_b32_e32 v0, 0x3f803f80 293; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v1, s[4:5] 294; GCN-NEXT: s_mov_b64 exec, s[4:5] 295; GCN-NEXT: v_mov_b32_e32 v1, v0 296; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 297; GCN-NEXT: s_endpgm 298 %tmp.0 = call <2 x bfloat> @llvm.amdgcn.set.inactive.v2bf16(<2 x bfloat> %in, <2 x bfloat> <bfloat 1.0, bfloat 1.0>) #0 299 %tmp = call <2 x bfloat> @llvm.amdgcn.strict.wwm.v2bf16(<2 x bfloat> %tmp.0) 300 store <2 x bfloat> %tmp, ptr addrspace(1) %out 301 ret void 302} 303 304define amdgpu_kernel void @set_inactive_v4i16(ptr addrspace(1) %out, <4 x i16> %in) { 305; GCN-LABEL: set_inactive_v4i16: 306; GCN: ; %bb.0: 307; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 308; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 309; GCN-NEXT: v_mov_b32_e32 v0, 0x10001 310; GCN-NEXT: s_mov_b64 exec, s[4:5] 311; GCN-NEXT: s_waitcnt lgkmcnt(0) 312; GCN-NEXT: v_mov_b32_e32 v3, s2 313; GCN-NEXT: v_mov_b32_e32 v4, s3 314; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 315; GCN-NEXT: s_mov_b32 s2, -1 316; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v3, s[4:5] 317; GCN-NEXT: v_cndmask_b32_e64 v2, v0, v4, s[4:5] 318; GCN-NEXT: s_mov_b64 exec, s[4:5] 319; GCN-NEXT: v_mov_b32_e32 v3, v1 320; GCN-NEXT: v_mov_b32_e32 v4, v2 321; GCN-NEXT: s_mov_b32 s3, 0xf000 322; GCN-NEXT: buffer_store_dwordx2 v[3:4], off, s[0:3], 0 323; GCN-NEXT: s_endpgm 324 %tmp.0 = call <4 x i16> @llvm.amdgcn.set.inactive.v4i16(<4 x i16> %in, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) #0 325 %tmp = call <4 x i16> @llvm.amdgcn.strict.wwm.v4i16(<4 x i16> %tmp.0) 326 store <4 x i16> %tmp, ptr addrspace(1) %out 327 ret void 328} 329 330define amdgpu_kernel void @set_inactive_v4f16(ptr addrspace(1) %out, <4 x half> %in) { 331; GCN-LABEL: set_inactive_v4f16: 332; GCN: ; %bb.0: 333; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 334; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 335; GCN-NEXT: v_mov_b32_e32 v0, 0x3c003c00 336; GCN-NEXT: s_mov_b64 exec, s[4:5] 337; GCN-NEXT: s_waitcnt lgkmcnt(0) 338; GCN-NEXT: v_mov_b32_e32 v3, s2 339; GCN-NEXT: v_mov_b32_e32 v4, s3 340; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 341; GCN-NEXT: s_mov_b32 s2, -1 342; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v3, s[4:5] 343; GCN-NEXT: v_cndmask_b32_e64 v2, v0, v4, s[4:5] 344; GCN-NEXT: s_mov_b64 exec, s[4:5] 345; GCN-NEXT: v_mov_b32_e32 v3, v1 346; GCN-NEXT: v_mov_b32_e32 v4, v2 347; GCN-NEXT: s_mov_b32 s3, 0xf000 348; GCN-NEXT: buffer_store_dwordx2 v[3:4], off, s[0:3], 0 349; GCN-NEXT: s_endpgm 350 %tmp.0 = call <4 x half> @llvm.amdgcn.set.inactive.v4f16(<4 x half> %in, <4 x half> <half 1.0, half 1.0, half 1.0, half 1.0>) #0 351 %tmp = call <4 x half> @llvm.amdgcn.strict.wwm.v4f16(<4 x half> %tmp.0) 352 store <4 x half> %tmp, ptr addrspace(1) %out 353 ret void 354} 355 356define amdgpu_kernel void @set_inactive_v4bf16(ptr addrspace(1) %out, <4 x bfloat> %in) { 357; GCN-LABEL: set_inactive_v4bf16: 358; GCN: ; %bb.0: 359; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 360; GCN-NEXT: s_mov_b32 s7, 0xf000 361; GCN-NEXT: s_mov_b32 s6, -1 362; GCN-NEXT: s_waitcnt lgkmcnt(0) 363; GCN-NEXT: s_mov_b32 s4, s0 364; GCN-NEXT: s_mov_b32 s5, s1 365; GCN-NEXT: v_mov_b32_e32 v2, s3 366; GCN-NEXT: s_or_saveexec_b64 s[0:1], -1 367; GCN-NEXT: v_mov_b32_e32 v0, 0x3f803f80 368; GCN-NEXT: v_cndmask_b32_e64 v1, v0, v2, s[0:1] 369; GCN-NEXT: s_mov_b64 exec, s[0:1] 370; GCN-NEXT: v_mov_b32_e32 v2, s2 371; GCN-NEXT: s_or_saveexec_b64 s[0:1], -1 372; GCN-NEXT: v_cndmask_b32_e64 v0, v0, v2, s[0:1] 373; GCN-NEXT: s_mov_b64 exec, s[0:1] 374; GCN-NEXT: v_mov_b32_e32 v2, v0 375; GCN-NEXT: v_mov_b32_e32 v3, v1 376; GCN-NEXT: buffer_store_dwordx2 v[2:3], off, s[4:7], 0 377; GCN-NEXT: s_endpgm 378 %tmp.0 = call <4 x bfloat> @llvm.amdgcn.set.inactive.v4bf16(<4 x bfloat> %in, <4 x bfloat> <bfloat 1.0, bfloat 1.0, bfloat 1.0, bfloat 1.0>) #0 379 %tmp = call <4 x bfloat> @llvm.amdgcn.strict.wwm.v4bf16(<4 x bfloat> %tmp.0) 380 store <4 x bfloat> %tmp, ptr addrspace(1) %out 381 ret void 382} 383 384define amdgpu_kernel void @set_inactive_p0(ptr addrspace(1) %out, ptr %in) { 385; GCN-LABEL: set_inactive_p0: 386; GCN: ; %bb.0: 387; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 388; GCN-NEXT: s_waitcnt lgkmcnt(0) 389; GCN-NEXT: v_mov_b32_e32 v2, s2 390; GCN-NEXT: v_mov_b32_e32 v3, s3 391; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 392; GCN-NEXT: s_mov_b32 s2, -1 393; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v2, s[4:5] 394; GCN-NEXT: v_cndmask_b32_e64 v1, 0, v3, s[4:5] 395; GCN-NEXT: s_mov_b64 exec, s[4:5] 396; GCN-NEXT: v_mov_b32_e32 v2, v0 397; GCN-NEXT: v_mov_b32_e32 v3, v1 398; GCN-NEXT: s_mov_b32 s3, 0xf000 399; GCN-NEXT: buffer_store_dwordx2 v[2:3], off, s[0:3], 0 400; GCN-NEXT: s_endpgm 401 %tmp.0 = call ptr @llvm.amdgcn.set.inactive.p0(ptr %in, ptr null) #0 402 %tmp = call ptr @llvm.amdgcn.strict.wwm.p0(ptr %tmp.0) 403 store ptr %tmp, ptr addrspace(1) %out 404 ret void 405} 406 407define amdgpu_kernel void @set_inactive_p2(ptr addrspace(1) %out, ptr addrspace(2) %in) { 408; GCN-LABEL: set_inactive_p2: 409; GCN: ; %bb.0: 410; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c 411; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 412; GCN-NEXT: s_mov_b32 s2, -1 413; GCN-NEXT: s_waitcnt lgkmcnt(0) 414; GCN-NEXT: v_mov_b32_e32 v1, s3 415; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 416; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5] 417; GCN-NEXT: s_mov_b64 exec, s[4:5] 418; GCN-NEXT: v_mov_b32_e32 v1, v0 419; GCN-NEXT: s_mov_b32 s3, 0xf000 420; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 421; GCN-NEXT: s_endpgm 422 %tmp.0 = call ptr addrspace(2) @llvm.amdgcn.set.inactive.p2(ptr addrspace(2) %in, ptr addrspace(2) null) #0 423 %tmp = call ptr addrspace(2) @llvm.amdgcn.strict.wwm.p2(ptr addrspace(2) %tmp.0) 424 store ptr addrspace(2) %tmp, ptr addrspace(1) %out 425 ret void 426} 427 428define amdgpu_kernel void @set_inactive_p3(ptr addrspace(1) %out, ptr addrspace(3) %in) { 429; GCN-LABEL: set_inactive_p3: 430; GCN: ; %bb.0: 431; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c 432; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 433; GCN-NEXT: s_mov_b32 s2, -1 434; GCN-NEXT: s_waitcnt lgkmcnt(0) 435; GCN-NEXT: v_mov_b32_e32 v1, s3 436; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 437; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5] 438; GCN-NEXT: s_mov_b64 exec, s[4:5] 439; GCN-NEXT: v_mov_b32_e32 v1, v0 440; GCN-NEXT: s_mov_b32 s3, 0xf000 441; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 442; GCN-NEXT: s_endpgm 443 %tmp.0 = call ptr addrspace(3) @llvm.amdgcn.set.inactive.p3(ptr addrspace(3) %in, ptr addrspace(3) null) #0 444 %tmp = call ptr addrspace(3) @llvm.amdgcn.strict.wwm.p3(ptr addrspace(3) %tmp.0) 445 store ptr addrspace(3) %tmp, ptr addrspace(1) %out 446 ret void 447} 448 449define amdgpu_kernel void @set_inactive_p5(ptr addrspace(1) %out, ptr addrspace(5) %in) { 450; GCN-LABEL: set_inactive_p5: 451; GCN: ; %bb.0: 452; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c 453; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 454; GCN-NEXT: s_mov_b32 s2, -1 455; GCN-NEXT: s_waitcnt lgkmcnt(0) 456; GCN-NEXT: v_mov_b32_e32 v1, s3 457; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 458; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5] 459; GCN-NEXT: s_mov_b64 exec, s[4:5] 460; GCN-NEXT: v_mov_b32_e32 v1, v0 461; GCN-NEXT: s_mov_b32 s3, 0xf000 462; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 463; GCN-NEXT: s_endpgm 464 %tmp.0 = call ptr addrspace(5) @llvm.amdgcn.set.inactive.p5(ptr addrspace(5) %in, ptr addrspace(5) null) #0 465 %tmp = call ptr addrspace(5) @llvm.amdgcn.strict.wwm.p5(ptr addrspace(5) %tmp.0) 466 store ptr addrspace(5) %tmp, ptr addrspace(1) %out 467 ret void 468} 469 470define amdgpu_kernel void @set_inactive_p6(ptr addrspace(1) %out, ptr addrspace(6) %in) { 471; GCN-LABEL: set_inactive_p6: 472; GCN: ; %bb.0: 473; GCN-NEXT: s_load_dword s3, s[4:5], 0x2c 474; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 475; GCN-NEXT: s_mov_b32 s2, -1 476; GCN-NEXT: s_waitcnt lgkmcnt(0) 477; GCN-NEXT: v_mov_b32_e32 v1, s3 478; GCN-NEXT: s_or_saveexec_b64 s[4:5], -1 479; GCN-NEXT: v_cndmask_b32_e64 v0, 0, v1, s[4:5] 480; GCN-NEXT: s_mov_b64 exec, s[4:5] 481; GCN-NEXT: v_mov_b32_e32 v1, v0 482; GCN-NEXT: s_mov_b32 s3, 0xf000 483; GCN-NEXT: buffer_store_dword v1, off, s[0:3], 0 484; GCN-NEXT: s_endpgm 485 %tmp.0 = call ptr addrspace(6) @llvm.amdgcn.set.inactive.p6(ptr addrspace(6) %in, ptr addrspace(6) null) #0 486 %tmp = call ptr addrspace(6) @llvm.amdgcn.strict.wwm.p6(ptr addrspace(6) %tmp.0) 487 store ptr addrspace(6) %tmp, ptr addrspace(1) %out 488 ret void 489} 490 491declare i32 @llvm.amdgcn.set.inactive.i32(i32, i32) #0 492declare i64 @llvm.amdgcn.set.inactive.i64(i64, i64) #0 493declare i32 @llvm.amdgcn.strict.wwm.i32(i32) #1 494declare i64 @llvm.amdgcn.strict.wwm.i64(i64) #1 495declare i32 @llvm.amdgcn.s.buffer.load.i32(<4 x i32>, i32, i32) 496 497attributes #0 = { convergent readnone } 498attributes #1 = { convergent nounwind readnone speculatable willreturn } 499