1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 2; RUN: opt -mtriple=amdgcn-- -amdgpu-codegenprepare -S < %s | FileCheck -check-prefix=OPT %s 3; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,DAGISEL-ASM 4; RUN: llc -mtriple=amdgcn-amd-amdhsa -global-isel -mcpu=gfx900 < %s | FileCheck %s --check-prefixes=ASM,GISEL-ASM 5 6; Tests that we can avoid nullptr checks for addrspacecasts from/to priv/local. 7; 8; Whenever a testcase is successful, we should see the addrspacecast replaced with the intrinsic 9; and the resulting code should have no select/cndmask null check for the pointer. 10 11define void @local_to_flat_nonnull_arg(ptr addrspace(3) nonnull %ptr) { 12; OPT-LABEL: define void @local_to_flat_nonnull_arg( 13; OPT-SAME: ptr addrspace(3) nonnull [[PTR:%.*]]) { 14; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p3(ptr addrspace(3) [[PTR]]) 15; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 16; OPT-NEXT: ret void 17; 18; ASM-LABEL: local_to_flat_nonnull_arg: 19; ASM: ; %bb.0: 20; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 21; ASM-NEXT: s_mov_b64 s[4:5], src_shared_base 22; ASM-NEXT: v_mov_b32_e32 v1, s5 23; ASM-NEXT: v_mov_b32_e32 v2, 7 24; ASM-NEXT: flat_store_dword v[0:1], v2 25; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 26; ASM-NEXT: s_setpc_b64 s[30:31] 27 %x = addrspacecast ptr addrspace(3) %ptr to ptr 28 store volatile i32 7, ptr %x 29 ret void 30} 31 32define void @private_to_flat_nonnull_arg(ptr addrspace(5) nonnull %ptr) { 33; OPT-LABEL: define void @private_to_flat_nonnull_arg( 34; OPT-SAME: ptr addrspace(5) nonnull [[PTR:%.*]]) { 35; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PTR]]) 36; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 37; OPT-NEXT: ret void 38; 39; ASM-LABEL: private_to_flat_nonnull_arg: 40; ASM: ; %bb.0: 41; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 42; ASM-NEXT: s_mov_b64 s[4:5], src_private_base 43; ASM-NEXT: v_mov_b32_e32 v1, s5 44; ASM-NEXT: v_mov_b32_e32 v2, 7 45; ASM-NEXT: flat_store_dword v[0:1], v2 46; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 47; ASM-NEXT: s_setpc_b64 s[30:31] 48 %x = addrspacecast ptr addrspace(5) %ptr to ptr 49 store volatile i32 7, ptr %x 50 ret void 51} 52 53define void @flat_to_local_nonnull_arg(ptr nonnull %ptr) { 54; OPT-LABEL: define void @flat_to_local_nonnull_arg( 55; OPT-SAME: ptr nonnull [[PTR:%.*]]) { 56; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(3) @llvm.amdgcn.addrspacecast.nonnull.p3.p0(ptr [[PTR]]) 57; OPT-NEXT: store volatile i32 7, ptr addrspace(3) [[TMP1]], align 4 58; OPT-NEXT: ret void 59; 60; ASM-LABEL: flat_to_local_nonnull_arg: 61; ASM: ; %bb.0: 62; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 63; ASM-NEXT: v_mov_b32_e32 v1, 7 64; ASM-NEXT: ds_write_b32 v0, v1 65; ASM-NEXT: s_waitcnt lgkmcnt(0) 66; ASM-NEXT: s_setpc_b64 s[30:31] 67 %x = addrspacecast ptr %ptr to ptr addrspace(3) 68 store volatile i32 7, ptr addrspace(3) %x 69 ret void 70} 71 72define void @flat_to_private_nonnull_arg(ptr nonnull %ptr) { 73; OPT-LABEL: define void @flat_to_private_nonnull_arg( 74; OPT-SAME: ptr nonnull [[PTR:%.*]]) { 75; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[PTR]]) 76; OPT-NEXT: store volatile i32 7, ptr addrspace(5) [[TMP1]], align 4 77; OPT-NEXT: ret void 78; 79; ASM-LABEL: flat_to_private_nonnull_arg: 80; ASM: ; %bb.0: 81; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 82; ASM-NEXT: v_mov_b32_e32 v1, 7 83; ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 84; ASM-NEXT: s_waitcnt vmcnt(0) 85; ASM-NEXT: s_setpc_b64 s[30:31] 86 %x = addrspacecast ptr %ptr to ptr addrspace(5) 87 store volatile i32 7, ptr addrspace(5) %x 88 ret void 89} 90 91define void @private_alloca_to_flat(ptr %ptr) { 92; OPT-LABEL: define void @private_alloca_to_flat( 93; OPT-SAME: ptr [[PTR:%.*]]) { 94; OPT-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5) 95; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[ALLOCA]]) 96; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 97; OPT-NEXT: ret void 98; 99; DAGISEL-ASM-LABEL: private_alloca_to_flat: 100; DAGISEL-ASM: ; %bb.0: 101; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 102; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base 103; DAGISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32 104; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5 105; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7 106; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2 107; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 108; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31] 109; 110; GISEL-ASM-LABEL: private_alloca_to_flat: 111; GISEL-ASM: ; %bb.0: 112; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 113; GISEL-ASM-NEXT: s_lshr_b32 s4, s32, 6 114; GISEL-ASM-NEXT: s_mov_b64 s[6:7], src_private_base 115; GISEL-ASM-NEXT: s_mov_b32 s5, s7 116; GISEL-ASM-NEXT: v_mov_b32_e32 v0, s4 117; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7 118; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s5 119; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2 120; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 121; GISEL-ASM-NEXT: s_setpc_b64 s[30:31] 122 %alloca = alloca i8, addrspace(5) 123 %x = addrspacecast ptr addrspace(5) %alloca to ptr 124 store volatile i32 7, ptr %x 125 ret void 126} 127 128@lds = internal unnamed_addr addrspace(3) global i8 poison, align 4 129 130define void @knownbits_on_flat_to_priv(ptr %ptr) { 131; OPT-LABEL: define void @knownbits_on_flat_to_priv( 132; OPT-SAME: ptr [[PTR:%.*]]) { 133; OPT-NEXT: [[PTR_INT:%.*]] = ptrtoint ptr [[PTR]] to i64 134; OPT-NEXT: [[PTR_OR:%.*]] = or i64 [[PTR_INT]], 15 135; OPT-NEXT: [[KB_PTR:%.*]] = inttoptr i64 [[PTR_OR]] to ptr 136; OPT-NEXT: [[TMP1:%.*]] = call ptr addrspace(5) @llvm.amdgcn.addrspacecast.nonnull.p5.p0(ptr [[KB_PTR]]) 137; OPT-NEXT: store volatile i32 7, ptr addrspace(5) [[TMP1]], align 4 138; OPT-NEXT: ret void 139; 140; ASM-LABEL: knownbits_on_flat_to_priv: 141; ASM: ; %bb.0: 142; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 143; ASM-NEXT: v_or_b32_e32 v0, 15, v0 144; ASM-NEXT: v_mov_b32_e32 v1, 7 145; ASM-NEXT: buffer_store_dword v1, v0, s[0:3], 0 offen 146; ASM-NEXT: s_waitcnt vmcnt(0) 147; ASM-NEXT: s_setpc_b64 s[30:31] 148 %ptr.int = ptrtoint ptr %ptr to i64 149 %ptr.or = or i64 %ptr.int, 15 ; set some low bits 150 %kb.ptr = inttoptr i64 %ptr.or to ptr 151 %x = addrspacecast ptr %kb.ptr to ptr addrspace(5) 152 store volatile i32 7, ptr addrspace(5) %x 153 ret void 154} 155 156define void @knownbits_on_priv_to_flat(ptr addrspace(5) %ptr) { 157; OPT-LABEL: define void @knownbits_on_priv_to_flat( 158; OPT-SAME: ptr addrspace(5) [[PTR:%.*]]) { 159; OPT-NEXT: [[PTR_INT:%.*]] = ptrtoint ptr addrspace(5) [[PTR]] to i32 160; OPT-NEXT: [[PTR_OR:%.*]] = and i32 [[PTR_INT]], 65535 161; OPT-NEXT: [[KB_PTR:%.*]] = inttoptr i32 [[PTR_OR]] to ptr addrspace(5) 162; OPT-NEXT: [[TMP1:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[KB_PTR]]) 163; OPT-NEXT: store volatile i32 7, ptr [[TMP1]], align 4 164; OPT-NEXT: ret void 165; 166; ASM-LABEL: knownbits_on_priv_to_flat: 167; ASM: ; %bb.0: 168; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 169; ASM-NEXT: s_mov_b64 s[4:5], src_private_base 170; ASM-NEXT: v_and_b32_e32 v0, 0xffff, v0 171; ASM-NEXT: v_mov_b32_e32 v1, s5 172; ASM-NEXT: v_mov_b32_e32 v2, 7 173; ASM-NEXT: flat_store_dword v[0:1], v2 174; ASM-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) 175; ASM-NEXT: s_setpc_b64 s[30:31] 176 %ptr.int = ptrtoint ptr addrspace(5) %ptr to i32 177 %ptr.or = and i32 %ptr.int, 65535 ; ensure only lower 16 bits can be set. 178 %kb.ptr = inttoptr i32 %ptr.or to ptr addrspace(5) 179 %x = addrspacecast ptr addrspace(5) %kb.ptr to ptr 180 store volatile i32 7, ptr %x 181 ret void 182} 183 184define void @recursive_phis(i1 %cond, ptr addrspace(5) %ptr) { 185; OPT-LABEL: define void @recursive_phis( 186; OPT-SAME: i1 [[COND:%.*]], ptr addrspace(5) [[PTR:%.*]]) { 187; OPT-NEXT: [[ENTRY:.*]]: 188; OPT-NEXT: [[ALLOCA:%.*]] = alloca i8, align 1, addrspace(5) 189; OPT-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[ELSE:.*]] 190; OPT: [[THEN]]: 191; OPT-NEXT: [[PTR_INT:%.*]] = ptrtoint ptr addrspace(5) [[PTR]] to i32 192; OPT-NEXT: [[PTR_OR:%.*]] = and i32 [[PTR_INT]], 65535 193; OPT-NEXT: [[KB_PTR:%.*]] = inttoptr i32 [[PTR_OR]] to ptr addrspace(5) 194; OPT-NEXT: br label %[[FINALLY:.*]] 195; OPT: [[ELSE]]: 196; OPT-NEXT: [[OTHER_PHI:%.*]] = phi ptr addrspace(5) [ [[ALLOCA]], %[[ENTRY]] ], [ [[PHI_PTR:%.*]], %[[FINALLY]] ] 197; OPT-NEXT: br label %[[FINALLY]] 198; OPT: [[FINALLY]]: 199; OPT-NEXT: [[PHI_PTR]] = phi ptr addrspace(5) [ [[KB_PTR]], %[[THEN]] ], [ [[OTHER_PHI]], %[[ELSE]] ] 200; OPT-NEXT: [[TMP0:%.*]] = call ptr @llvm.amdgcn.addrspacecast.nonnull.p0.p5(ptr addrspace(5) [[PHI_PTR]]) 201; OPT-NEXT: store volatile i32 7, ptr [[TMP0]], align 4 202; OPT-NEXT: br i1 [[COND]], label %[[ELSE]], label %[[END:.*]] 203; OPT: [[END]]: 204; OPT-NEXT: ret void 205; 206; DAGISEL-ASM-LABEL: recursive_phis: 207; DAGISEL-ASM: ; %bb.0: ; %entry 208; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 209; DAGISEL-ASM-NEXT: v_and_b32_e32 v0, 1, v0 210; DAGISEL-ASM-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 211; DAGISEL-ASM-NEXT: v_lshrrev_b32_e64 v0, 6, s32 212; DAGISEL-ASM-NEXT: s_and_saveexec_b64 s[4:5], vcc 213; DAGISEL-ASM-NEXT: ; %bb.1: ; %then 214; DAGISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v1 215; DAGISEL-ASM-NEXT: ; %bb.2: ; %finallyendcf.split 216; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5] 217; DAGISEL-ASM-NEXT: s_xor_b64 s[6:7], vcc, -1 218; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], 0 219; DAGISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base 220; DAGISEL-ASM-NEXT: v_mov_b32_e32 v2, 7 221; DAGISEL-ASM-NEXT: .LBB7_3: ; %finally 222; DAGISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1 223; DAGISEL-ASM-NEXT: s_and_b64 s[10:11], exec, s[6:7] 224; DAGISEL-ASM-NEXT: s_or_b64 s[4:5], s[10:11], s[4:5] 225; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s9 226; DAGISEL-ASM-NEXT: flat_store_dword v[0:1], v2 227; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) 228; DAGISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[4:5] 229; DAGISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3 230; DAGISEL-ASM-NEXT: ; %bb.4: ; %end 231; DAGISEL-ASM-NEXT: s_or_b64 exec, exec, s[4:5] 232; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) 233; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31] 234; 235; GISEL-ASM-LABEL: recursive_phis: 236; GISEL-ASM: ; %bb.0: ; %entry 237; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 238; GISEL-ASM-NEXT: v_and_b32_e32 v0, 1, v0 239; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 240; GISEL-ASM-NEXT: s_lshr_b32 s6, s32, 6 241; GISEL-ASM-NEXT: s_xor_b64 s[4:5], vcc, -1 242; GISEL-ASM-NEXT: v_mov_b32_e32 v0, s6 243; GISEL-ASM-NEXT: s_and_saveexec_b64 s[6:7], vcc 244; GISEL-ASM-NEXT: ; %bb.1: ; %then 245; GISEL-ASM-NEXT: v_and_b32_e32 v0, 0xffff, v1 246; GISEL-ASM-NEXT: ; %bb.2: ; %finallyendcf.split 247; GISEL-ASM-NEXT: s_or_b64 exec, exec, s[6:7] 248; GISEL-ASM-NEXT: s_mov_b64 s[8:9], src_private_base 249; GISEL-ASM-NEXT: s_mov_b64 s[6:7], 0 250; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s9 251; GISEL-ASM-NEXT: v_mov_b32_e32 v2, 7 252; GISEL-ASM-NEXT: .LBB7_3: ; %finally 253; GISEL-ASM-NEXT: ; =>This Inner Loop Header: Depth=1 254; GISEL-ASM-NEXT: s_and_b64 s[8:9], exec, s[4:5] 255; GISEL-ASM-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] 256; GISEL-ASM-NEXT: flat_store_dword v[0:1], v2 257; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) 258; GISEL-ASM-NEXT: s_andn2_b64 exec, exec, s[6:7] 259; GISEL-ASM-NEXT: s_cbranch_execnz .LBB7_3 260; GISEL-ASM-NEXT: ; %bb.4: ; %end 261; GISEL-ASM-NEXT: s_or_b64 exec, exec, s[6:7] 262; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) 263; GISEL-ASM-NEXT: s_setpc_b64 s[30:31] 264entry: 265 %alloca = alloca i8, addrspace(5) 266 br i1 %cond, label %then, label %else 267 268then: 269 %ptr.int = ptrtoint ptr addrspace(5) %ptr to i32 270 %ptr.or = and i32 %ptr.int, 65535 ; ensure low bits are zeroes 271 %kb.ptr = inttoptr i32 %ptr.or to ptr addrspace(5) 272 br label %finally 273 274else: 275 %other.phi = phi ptr addrspace(5) [%alloca, %entry], [%phi.ptr, %finally] 276 br label %finally 277 278finally: 279 %phi.ptr = phi ptr addrspace(5) [%kb.ptr, %then], [%other.phi, %else] 280 %x = addrspacecast ptr addrspace(5) %phi.ptr to ptr 281 store volatile i32 7, ptr %x 282 br i1 %cond, label %else, label %end 283 284end: 285 ret void 286} 287 288; This used to assert due to assuming the size of the source address 289; space was larger than the destination. 290 291define i32 @cast_private_to_flat_to_private(ptr addrspace(5) %private.ptr) { 292; OPT-LABEL: define i32 @cast_private_to_flat_to_private( 293; OPT-SAME: ptr addrspace(5) [[PRIVATE_PTR:%.*]]) { 294; OPT-NEXT: [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(5) [[PRIVATE_PTR]] to ptr 295; OPT-NEXT: [[CAST_BACK:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(5) 296; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(5) [[CAST_BACK]], align 4 297; OPT-NEXT: ret i32 [[LOAD]] 298; 299; ASM-LABEL: cast_private_to_flat_to_private: 300; ASM: ; %bb.0: 301; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 302; ASM-NEXT: buffer_load_dword v0, v0, s[0:3], 0 offen glc 303; ASM-NEXT: s_waitcnt vmcnt(0) 304; ASM-NEXT: s_setpc_b64 s[30:31] 305 %flat.ptr = addrspacecast ptr addrspace(5) %private.ptr to ptr 306 %cast.back = addrspacecast ptr %flat.ptr to ptr addrspace(5) 307 %load = load volatile i32, ptr addrspace(5) %cast.back 308 ret i32 %load 309} 310 311; This is UB but shouldn't assert. 312define i32 @cast_private_to_flat_to_local(ptr addrspace(5) %private.ptr) { 313; OPT-LABEL: define i32 @cast_private_to_flat_to_local( 314; OPT-SAME: ptr addrspace(5) [[PRIVATE_PTR:%.*]]) { 315; OPT-NEXT: [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(5) [[PRIVATE_PTR]] to ptr 316; OPT-NEXT: [[CAST_BACK:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3) 317; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[CAST_BACK]], align 4 318; OPT-NEXT: ret i32 [[LOAD]] 319; 320; DAGISEL-ASM-LABEL: cast_private_to_flat_to_local: 321; DAGISEL-ASM: ; %bb.0: 322; DAGISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 323; DAGISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base 324; DAGISEL-ASM-NEXT: v_mov_b32_e32 v1, s5 325; DAGISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 326; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 327; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 328; DAGISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 329; DAGISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 330; DAGISEL-ASM-NEXT: ds_read_b32 v0, v0 331; DAGISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) 332; DAGISEL-ASM-NEXT: s_setpc_b64 s[30:31] 333; 334; GISEL-ASM-LABEL: cast_private_to_flat_to_local: 335; GISEL-ASM: ; %bb.0: 336; GISEL-ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 337; GISEL-ASM-NEXT: s_mov_b64 s[4:5], src_private_base 338; GISEL-ASM-NEXT: v_mov_b32_e32 v1, s5 339; GISEL-ASM-NEXT: v_cmp_ne_u32_e32 vcc, -1, v0 340; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc 341; GISEL-ASM-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc 342; GISEL-ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 343; GISEL-ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 344; GISEL-ASM-NEXT: ds_read_b32 v0, v0 345; GISEL-ASM-NEXT: s_waitcnt lgkmcnt(0) 346; GISEL-ASM-NEXT: s_setpc_b64 s[30:31] 347 %flat.ptr = addrspacecast ptr addrspace(5) %private.ptr to ptr 348 %cast.back = addrspacecast ptr %flat.ptr to ptr addrspace(3) 349 %load = load volatile i32, ptr addrspace(3) %cast.back 350 ret i32 %load 351} 352 353; This is UB but shouldn't assert. 354define i32 @cast_private_to_flat_to_global(ptr addrspace(6) %const32.ptr) { 355; OPT-LABEL: define i32 @cast_private_to_flat_to_global( 356; OPT-SAME: ptr addrspace(6) [[CONST32_PTR:%.*]]) { 357; OPT-NEXT: [[FLAT_PTR:%.*]] = addrspacecast ptr addrspace(6) [[CONST32_PTR]] to ptr 358; OPT-NEXT: [[LOCAL_PTR:%.*]] = addrspacecast ptr [[FLAT_PTR]] to ptr addrspace(3) 359; OPT-NEXT: [[LOAD:%.*]] = load volatile i32, ptr addrspace(3) [[LOCAL_PTR]], align 4 360; OPT-NEXT: ret i32 [[LOAD]] 361; 362; ASM-LABEL: cast_private_to_flat_to_global: 363; ASM: ; %bb.0: 364; ASM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 365; ASM-NEXT: v_mov_b32_e32 v1, 0 366; ASM-NEXT: v_cmp_ne_u64_e32 vcc, 0, v[0:1] 367; ASM-NEXT: v_cndmask_b32_e32 v0, -1, v0, vcc 368; ASM-NEXT: ds_read_b32 v0, v0 369; ASM-NEXT: s_waitcnt lgkmcnt(0) 370; ASM-NEXT: s_setpc_b64 s[30:31] 371 %flat.ptr = addrspacecast ptr addrspace(6) %const32.ptr to ptr 372 %local.ptr = addrspacecast ptr %flat.ptr to ptr addrspace(3) 373 %load = load volatile i32, ptr addrspace(3) %local.ptr 374 ret i32 %load 375} 376