1; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2; RUN: llc -O0 -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -stop-after=regallocfast < %s | FileCheck -check-prefixes=GCN %s 3 4; Verify that we consider the xor at the end of the waterfall loop emitted for 5; divergent indirect addressing as a terminator. 6 7declare i32 @llvm.amdgcn.workitem.id.x() #1 8 9; There should be no spill code inserted between the xor and the real terminator 10define amdgpu_kernel void @extract_w_offset_vgpr(ptr addrspace(1) %out) { 11 ; GCN-LABEL: name: extract_w_offset_vgpr 12 ; GCN: bb.0.entry: 13 ; GCN-NEXT: successors: %bb.1(0x80000000) 14 ; GCN-NEXT: liveins: $vgpr0, $sgpr4_sgpr5 15 ; GCN-NEXT: {{ $}} 16 ; GCN-NEXT: [[COPY:%[0-9]+]]:vgpr_32(s32) = COPY killed $vgpr0 17 ; GCN-NEXT: early-clobber renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM_ec killed renamable $sgpr4_sgpr5, 36, 0 :: (dereferenceable invariant load (s64) from %ir.out.kernarg.offset, align 4, addrspace 4) 18 ; GCN-NEXT: renamable $sgpr6 = COPY renamable $sgpr1 19 ; GCN-NEXT: renamable $sgpr0 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 20 ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 61440 21 ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 -1 22 ; GCN-NEXT: undef renamable $sgpr0 = COPY killed renamable $sgpr0, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3 23 ; GCN-NEXT: renamable $sgpr1 = COPY killed renamable $sgpr6 24 ; GCN-NEXT: renamable $sgpr2 = COPY killed renamable $sgpr5 25 ; GCN-NEXT: renamable $sgpr3 = COPY killed renamable $sgpr4 26 ; GCN-NEXT: SI_SPILL_S128_SAVE killed $sgpr0_sgpr1_sgpr2_sgpr3, %stack.1, implicit $exec, implicit $sgpr32 :: (store (s128) into %stack.1, align 4, addrspace 5) 27 ; GCN-NEXT: renamable $sgpr0 = S_MOV_B32 16 28 ; GCN-NEXT: renamable $sgpr1 = S_MOV_B32 15 29 ; GCN-NEXT: renamable $sgpr2 = S_MOV_B32 14 30 ; GCN-NEXT: renamable $sgpr3 = S_MOV_B32 13 31 ; GCN-NEXT: renamable $sgpr4 = S_MOV_B32 12 32 ; GCN-NEXT: renamable $sgpr5 = S_MOV_B32 11 33 ; GCN-NEXT: renamable $sgpr6 = S_MOV_B32 10 34 ; GCN-NEXT: renamable $sgpr7 = S_MOV_B32 9 35 ; GCN-NEXT: renamable $sgpr8 = S_MOV_B32 8 36 ; GCN-NEXT: renamable $sgpr9 = S_MOV_B32 7 37 ; GCN-NEXT: renamable $sgpr10 = S_MOV_B32 6 38 ; GCN-NEXT: renamable $sgpr11 = S_MOV_B32 5 39 ; GCN-NEXT: renamable $sgpr12 = S_MOV_B32 3 40 ; GCN-NEXT: renamable $sgpr13 = S_MOV_B32 2 41 ; GCN-NEXT: renamable $sgpr14 = S_MOV_B32 1 42 ; GCN-NEXT: renamable $sgpr15 = S_MOV_B32 0 43 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr15 44 ; GCN-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr14 45 ; GCN-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr13 46 ; GCN-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr12 47 ; GCN-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr11 48 ; GCN-NEXT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr10 49 ; GCN-NEXT: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr9 50 ; GCN-NEXT: [[COPY8:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr8 51 ; GCN-NEXT: [[COPY9:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr7 52 ; GCN-NEXT: [[COPY10:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr6 53 ; GCN-NEXT: [[COPY11:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr5 54 ; GCN-NEXT: [[COPY12:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr4 55 ; GCN-NEXT: [[COPY13:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr3 56 ; GCN-NEXT: [[COPY14:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr2 57 ; GCN-NEXT: [[COPY15:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr1 58 ; GCN-NEXT: [[COPY16:%[0-9]+]]:vgpr_32 = COPY killed renamable $sgpr0 59 ; GCN-NEXT: undef [[COPY17:%[0-9]+]].sub0:vreg_512 = COPY [[COPY1]] 60 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub1:vreg_512 = COPY [[COPY2]] 61 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub2:vreg_512 = COPY [[COPY3]] 62 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub3:vreg_512 = COPY [[COPY4]] 63 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub4:vreg_512 = COPY [[COPY5]] 64 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub5:vreg_512 = COPY [[COPY6]] 65 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub6:vreg_512 = COPY [[COPY7]] 66 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub7:vreg_512 = COPY [[COPY8]] 67 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub8:vreg_512 = COPY [[COPY9]] 68 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub9:vreg_512 = COPY [[COPY10]] 69 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub10:vreg_512 = COPY [[COPY11]] 70 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub11:vreg_512 = COPY [[COPY12]] 71 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub12:vreg_512 = COPY [[COPY13]] 72 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub13:vreg_512 = COPY [[COPY14]] 73 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub14:vreg_512 = COPY [[COPY15]] 74 ; GCN-NEXT: [[COPY17:%[0-9]+]].sub15:vreg_512 = COPY [[COPY16]] 75 ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec 76 ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.0, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.0, align 4, addrspace 5) 77 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF 78 ; GCN-NEXT: renamable $sgpr0_sgpr1 = IMPLICIT_DEF 79 ; GCN-NEXT: {{ $}} 80 ; GCN-NEXT: bb.1: 81 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.3(0x40000000) 82 ; GCN-NEXT: {{ $}} 83 ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.2, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.2, align 4, addrspace 5) 84 ; GCN-NEXT: dead [[COPY18:%[0-9]+]]:vgpr_32 = COPY [[DEF]] 85 ; GCN-NEXT: renamable $sgpr2 = V_READFIRSTLANE_B32 [[COPY]](s32), implicit $exec 86 ; GCN-NEXT: renamable $sgpr0_sgpr1 = V_CMP_EQ_U32_e64 $sgpr2, [[COPY]](s32), implicit $exec 87 ; GCN-NEXT: renamable $sgpr0_sgpr1 = S_AND_SAVEEXEC_B64 killed renamable $sgpr0_sgpr1, implicit-def $exec, implicit-def dead $scc, implicit $exec 88 ; GCN-NEXT: [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_:%[0-9]+]]:vgpr_32 = V_INDIRECT_REG_READ_GPR_IDX_B32_V16 [[COPY17]], killed $sgpr2, 11, implicit-def $m0, implicit $m0, implicit $exec 89 ; GCN-NEXT: [[DEF:%[0-9]+]]:vgpr_32 = COPY [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]] 90 ; GCN-NEXT: renamable $sgpr2_sgpr3 = COPY renamable $sgpr0_sgpr1 91 ; GCN-NEXT: SI_SPILL_S64_SAVE killed $sgpr2_sgpr3, %stack.2, implicit $exec, implicit $sgpr32 :: (store (s64) into %stack.2, align 4, addrspace 5) 92 ; GCN-NEXT: $exec = S_XOR_B64_term $exec, killed renamable $sgpr0_sgpr1, implicit-def dead $scc 93 ; GCN-NEXT: S_CBRANCH_EXECNZ %bb.1, implicit $exec 94 ; GCN-NEXT: {{ $}} 95 ; GCN-NEXT: bb.3: 96 ; GCN-NEXT: successors: %bb.2(0x80000000) 97 ; GCN-NEXT: {{ $}} 98 ; GCN-NEXT: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.0, implicit $exec, implicit $sgpr32 :: (load (s64) from %stack.0, align 4, addrspace 5) 99 ; GCN-NEXT: $exec = S_MOV_B64 renamable $sgpr0_sgpr1 100 ; GCN-NEXT: {{ $}} 101 ; GCN-NEXT: bb.2: 102 ; GCN-NEXT: $sgpr0_sgpr1_sgpr2_sgpr3 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr32 :: (load (s128) from %stack.1, align 4, addrspace 5) 103 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET [[V_INDIRECT_REG_READ_GPR_IDX_B32_V16_]], killed renamable $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %ir.out.load, addrspace 1) 104 ; GCN-NEXT: S_ENDPGM 0 105entry: 106 %id = call i32 @llvm.amdgcn.workitem.id.x() #1 107 %index = add i32 %id, 1 108 %value = extractelement <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 %index 109 store i32 %value, ptr addrspace(1) %out 110 ret void 111} 112 113!llvm.module.flags = !{!0} 114!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} 115