1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s 3 4; Test the localizer did something and we don't materialize all 5; constants in SGPRs in the entry block. 6 7define amdgpu_kernel void @localize_constants(i1 %cond) { 8; GFX9-LABEL: localize_constants: 9; GFX9: ; %bb.0: ; %entry 10; GFX9-NEXT: s_load_dword s1, s[8:9], 0x0 11; GFX9-NEXT: s_mov_b32 s0, 1 12; GFX9-NEXT: s_waitcnt lgkmcnt(0) 13; GFX9-NEXT: s_xor_b32 s1, s1, 1 14; GFX9-NEXT: s_and_b32 s1, s1, 1 15; GFX9-NEXT: s_cmp_lg_u32 s1, 0 16; GFX9-NEXT: s_cbranch_scc0 .LBB0_2 17; GFX9-NEXT: ; %bb.1: ; %bb1 18; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 19; GFX9-NEXT: global_store_dword v[0:1], v0, off 20; GFX9-NEXT: s_waitcnt vmcnt(0) 21; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 22; GFX9-NEXT: global_store_dword v[0:1], v0, off 23; GFX9-NEXT: s_waitcnt vmcnt(0) 24; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 25; GFX9-NEXT: global_store_dword v[0:1], v0, off 26; GFX9-NEXT: s_waitcnt vmcnt(0) 27; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 28; GFX9-NEXT: global_store_dword v[0:1], v0, off 29; GFX9-NEXT: s_waitcnt vmcnt(0) 30; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 31; GFX9-NEXT: global_store_dword v[0:1], v0, off 32; GFX9-NEXT: s_waitcnt vmcnt(0) 33; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 34; GFX9-NEXT: global_store_dword v[0:1], v0, off 35; GFX9-NEXT: s_waitcnt vmcnt(0) 36; GFX9-NEXT: s_mov_b32 s0, 0 37; GFX9-NEXT: .LBB0_2: ; %Flow 38; GFX9-NEXT: s_xor_b32 s0, s0, 1 39; GFX9-NEXT: s_and_b32 s0, s0, 1 40; GFX9-NEXT: s_cmp_lg_u32 s0, 0 41; GFX9-NEXT: s_cbranch_scc1 .LBB0_4 42; GFX9-NEXT: ; %bb.3: ; %bb0 43; GFX9-NEXT: v_mov_b32_e32 v0, 0x7b 44; GFX9-NEXT: global_store_dword v[0:1], v0, off 45; GFX9-NEXT: s_waitcnt vmcnt(0) 46; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c8 47; GFX9-NEXT: global_store_dword v[0:1], v0, off 48; GFX9-NEXT: s_waitcnt vmcnt(0) 49; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e7 50; GFX9-NEXT: global_store_dword v[0:1], v0, off 51; GFX9-NEXT: s_waitcnt vmcnt(0) 52; GFX9-NEXT: v_mov_b32_e32 v0, 0x3e8 53; GFX9-NEXT: global_store_dword v[0:1], v0, off 54; GFX9-NEXT: s_waitcnt vmcnt(0) 55; GFX9-NEXT: v_mov_b32_e32 v0, 0x1c7 56; GFX9-NEXT: global_store_dword v[0:1], v0, off 57; GFX9-NEXT: s_waitcnt vmcnt(0) 58; GFX9-NEXT: v_mov_b32_e32 v0, 0x5be6 59; GFX9-NEXT: global_store_dword v[0:1], v0, off 60; GFX9-NEXT: s_waitcnt vmcnt(0) 61; GFX9-NEXT: .LBB0_4: ; %bb2 62; GFX9-NEXT: s_endpgm 63entry: 64 br i1 %cond, label %bb0, label %bb1 65 66bb0: 67 store volatile i32 123, ptr addrspace(1) undef 68 store volatile i32 456, ptr addrspace(1) undef 69 store volatile i32 999, ptr addrspace(1) undef 70 store volatile i32 1000, ptr addrspace(1) undef 71 store volatile i32 455, ptr addrspace(1) undef 72 store volatile i32 23526, ptr addrspace(1) undef 73 br label %bb2 74 75bb1: 76 store volatile i32 23526, ptr addrspace(1) undef 77 store volatile i32 455, ptr addrspace(1) undef 78 store volatile i32 1000, ptr addrspace(1) undef 79 store volatile i32 456, ptr addrspace(1) undef 80 store volatile i32 999, ptr addrspace(1) undef 81 store volatile i32 123, ptr addrspace(1) undef 82 br label %bb2 83 84bb2: 85 ret void 86} 87 88; FIXME: These aren't localized because thesee were legalized before 89; the localizer, and are no longer G_GLOBAL_VALUE. 90@gv0 = addrspace(1) global i32 undef, align 4 91@gv1 = addrspace(1) global i32 undef, align 4 92@gv2 = addrspace(1) global i32 undef, align 4 93@gv3 = addrspace(1) global i32 undef, align 4 94 95define amdgpu_kernel void @localize_globals(i1 %cond) { 96; GFX9-LABEL: localize_globals: 97; GFX9: ; %bb.0: ; %entry 98; GFX9-NEXT: s_load_dword s1, s[8:9], 0x0 99; GFX9-NEXT: s_mov_b32 s0, 1 100; GFX9-NEXT: s_waitcnt lgkmcnt(0) 101; GFX9-NEXT: s_xor_b32 s1, s1, 1 102; GFX9-NEXT: s_and_b32 s1, s1, 1 103; GFX9-NEXT: s_cmp_lg_u32 s1, 0 104; GFX9-NEXT: s_cbranch_scc0 .LBB1_2 105; GFX9-NEXT: ; %bb.1: ; %bb1 106; GFX9-NEXT: s_getpc_b64 s[0:1] 107; GFX9-NEXT: s_add_u32 s0, s0, gv2@gotpcrel32@lo+4 108; GFX9-NEXT: s_addc_u32 s1, s1, gv2@gotpcrel32@hi+12 109; GFX9-NEXT: s_load_dwordx2 s[2:3], s[0:1], 0x0 110; GFX9-NEXT: s_getpc_b64 s[0:1] 111; GFX9-NEXT: s_add_u32 s0, s0, gv3@gotpcrel32@lo+4 112; GFX9-NEXT: s_addc_u32 s1, s1, gv3@gotpcrel32@hi+12 113; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x0 114; GFX9-NEXT: v_mov_b32_e32 v0, 0 115; GFX9-NEXT: v_mov_b32_e32 v1, 1 116; GFX9-NEXT: s_mov_b32 s0, 0 117; GFX9-NEXT: s_waitcnt lgkmcnt(0) 118; GFX9-NEXT: global_store_dword v0, v0, s[2:3] 119; GFX9-NEXT: s_waitcnt vmcnt(0) 120; GFX9-NEXT: global_store_dword v0, v1, s[4:5] 121; GFX9-NEXT: s_waitcnt vmcnt(0) 122; GFX9-NEXT: .LBB1_2: ; %Flow 123; GFX9-NEXT: s_xor_b32 s0, s0, 1 124; GFX9-NEXT: s_and_b32 s0, s0, 1 125; GFX9-NEXT: s_cmp_lg_u32 s0, 0 126; GFX9-NEXT: s_cbranch_scc1 .LBB1_4 127; GFX9-NEXT: ; %bb.3: ; %bb0 128; GFX9-NEXT: s_getpc_b64 s[0:1] 129; GFX9-NEXT: s_add_u32 s0, s0, gv0@gotpcrel32@lo+4 130; GFX9-NEXT: s_addc_u32 s1, s1, gv0@gotpcrel32@hi+12 131; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x0 132; GFX9-NEXT: s_getpc_b64 s[2:3] 133; GFX9-NEXT: s_add_u32 s2, s2, gv1@gotpcrel32@lo+4 134; GFX9-NEXT: s_addc_u32 s3, s3, gv1@gotpcrel32@hi+12 135; GFX9-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 136; GFX9-NEXT: v_mov_b32_e32 v0, 0 137; GFX9-NEXT: v_mov_b32_e32 v1, 1 138; GFX9-NEXT: s_waitcnt lgkmcnt(0) 139; GFX9-NEXT: global_store_dword v0, v0, s[0:1] 140; GFX9-NEXT: s_waitcnt vmcnt(0) 141; GFX9-NEXT: global_store_dword v0, v1, s[2:3] 142; GFX9-NEXT: s_waitcnt vmcnt(0) 143; GFX9-NEXT: .LBB1_4: ; %bb2 144; GFX9-NEXT: s_endpgm 145entry: 146 br i1 %cond, label %bb0, label %bb1 147 148bb0: 149 store volatile i32 0, ptr addrspace(1) @gv0 150 store volatile i32 1, ptr addrspace(1) @gv1 151 br label %bb2 152 153bb1: 154 store volatile i32 0, ptr addrspace(1) @gv2 155 store volatile i32 1, ptr addrspace(1) @gv3 156 br label %bb2 157 158bb2: 159 ret void 160} 161 162@static.gv0 = internal addrspace(1) global i32 undef, align 4 163@static.gv1 = internal addrspace(1) global i32 undef, align 4 164@static.gv2 = internal addrspace(1) global i32 undef, align 4 165@static.gv3 = internal addrspace(1) global i32 undef, align 4 166 167define void @localize_internal_globals(i1 %cond) { 168; GFX9-LABEL: localize_internal_globals: 169; GFX9: ; %bb.0: ; %entry 170; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 171; GFX9-NEXT: v_and_b32_e32 v0, 1, v0 172; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 173; GFX9-NEXT: s_xor_b64 s[4:5], vcc, -1 174; GFX9-NEXT: s_and_saveexec_b64 s[6:7], s[4:5] 175; GFX9-NEXT: s_xor_b64 s[4:5], exec, s[6:7] 176; GFX9-NEXT: s_cbranch_execnz .LBB2_3 177; GFX9-NEXT: ; %bb.1: ; %Flow 178; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 179; GFX9-NEXT: s_cbranch_execnz .LBB2_4 180; GFX9-NEXT: .LBB2_2: ; %bb2 181; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 182; GFX9-NEXT: s_setpc_b64 s[30:31] 183; GFX9-NEXT: .LBB2_3: ; %bb1 184; GFX9-NEXT: s_getpc_b64 s[6:7] 185; GFX9-NEXT: s_add_u32 s6, s6, static.gv2@rel32@lo+4 186; GFX9-NEXT: s_addc_u32 s7, s7, static.gv2@rel32@hi+12 187; GFX9-NEXT: v_mov_b32_e32 v0, 0 188; GFX9-NEXT: global_store_dword v0, v0, s[6:7] 189; GFX9-NEXT: s_waitcnt vmcnt(0) 190; GFX9-NEXT: s_getpc_b64 s[6:7] 191; GFX9-NEXT: s_add_u32 s6, s6, static.gv3@rel32@lo+4 192; GFX9-NEXT: s_addc_u32 s7, s7, static.gv3@rel32@hi+12 193; GFX9-NEXT: v_mov_b32_e32 v1, 1 194; GFX9-NEXT: global_store_dword v0, v1, s[6:7] 195; GFX9-NEXT: s_waitcnt vmcnt(0) 196; GFX9-NEXT: s_andn2_saveexec_b64 s[4:5], s[4:5] 197; GFX9-NEXT: s_cbranch_execz .LBB2_2 198; GFX9-NEXT: .LBB2_4: ; %bb0 199; GFX9-NEXT: s_getpc_b64 s[6:7] 200; GFX9-NEXT: s_add_u32 s6, s6, static.gv0@rel32@lo+4 201; GFX9-NEXT: s_addc_u32 s7, s7, static.gv0@rel32@hi+12 202; GFX9-NEXT: v_mov_b32_e32 v0, 0 203; GFX9-NEXT: global_store_dword v0, v0, s[6:7] 204; GFX9-NEXT: s_waitcnt vmcnt(0) 205; GFX9-NEXT: s_getpc_b64 s[6:7] 206; GFX9-NEXT: s_add_u32 s6, s6, static.gv1@rel32@lo+4 207; GFX9-NEXT: s_addc_u32 s7, s7, static.gv1@rel32@hi+12 208; GFX9-NEXT: v_mov_b32_e32 v1, 1 209; GFX9-NEXT: global_store_dword v0, v1, s[6:7] 210; GFX9-NEXT: s_waitcnt vmcnt(0) 211; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] 212; GFX9-NEXT: s_setpc_b64 s[30:31] 213entry: 214 br i1 %cond, label %bb0, label %bb1 215 216bb0: 217 store volatile i32 0, ptr addrspace(1) @static.gv0 218 store volatile i32 1, ptr addrspace(1) @static.gv1 219 br label %bb2 220 221bb1: 222 store volatile i32 0, ptr addrspace(1) @static.gv2 223 store volatile i32 1, ptr addrspace(1) @static.gv3 224 br label %bb2 225 226bb2: 227 ret void 228} 229 230; This would crash from using the wrong insert point 231define void @sink_null_insert_pt(ptr addrspace(4) %arg0) { 232; GFX9-LABEL: sink_null_insert_pt: 233; GFX9: ; %bb.0: ; %entry 234; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) 235; GFX9-NEXT: s_mov_b32 s16, s33 236; GFX9-NEXT: s_mov_b32 s33, s32 237; GFX9-NEXT: s_or_saveexec_b64 s[18:19], -1 238; GFX9-NEXT: buffer_store_dword v40, off, s[0:3], s33 ; 4-byte Folded Spill 239; GFX9-NEXT: s_mov_b64 exec, s[18:19] 240; GFX9-NEXT: v_mov_b32_e32 v0, 0 241; GFX9-NEXT: v_mov_b32_e32 v1, 0 242; GFX9-NEXT: global_load_dword v0, v[0:1], off glc 243; GFX9-NEXT: s_waitcnt vmcnt(0) 244; GFX9-NEXT: v_writelane_b32 v40, s16, 2 245; GFX9-NEXT: v_writelane_b32 v40, s30, 0 246; GFX9-NEXT: s_addk_i32 s32, 0x400 247; GFX9-NEXT: v_writelane_b32 v40, s31, 1 248; GFX9-NEXT: s_swappc_b64 s[30:31], 0 249; GFX9-NEXT: v_readlane_b32 s31, v40, 1 250; GFX9-NEXT: v_readlane_b32 s30, v40, 0 251; GFX9-NEXT: s_mov_b32 s32, s33 252; GFX9-NEXT: v_readlane_b32 s4, v40, 2 253; GFX9-NEXT: s_or_saveexec_b64 s[6:7], -1 254; GFX9-NEXT: buffer_load_dword v40, off, s[0:3], s33 ; 4-byte Folded Reload 255; GFX9-NEXT: s_mov_b64 exec, s[6:7] 256; GFX9-NEXT: s_mov_b32 s33, s4 257; GFX9-NEXT: s_waitcnt vmcnt(0) 258; GFX9-NEXT: s_setpc_b64 s[30:31] 259entry: 260 %load0 = load volatile i32, ptr addrspace(1) null, align 4 261 br label %bb1 262 263bb1: 264 call void null() 265 ret void 266} 267