1# RUN: llc -mtriple=amdgcn-- -run-pass=print-machine-uniformity -o - %s 2>&1 | FileCheck %s 2 3--- 4name: test1 5tracksRegLiveness: true 6body: | 7 bb.1: 8 %2:_(s32) = IMPLICIT_DEF 9 %3:_(s32) = IMPLICIT_DEF 10 %0:_(p0) = G_MERGE_VALUES %2(s32), %3(s32) 11 %1:_(s32) = IMPLICIT_DEF 12 13 ; CHECK: DIVERGENT 14 ; CHECK-SAME: G_ATOMICRMW_XCHG 15 %4:_(s32) = G_ATOMICRMW_XCHG %0(p0), %1 :: (load store seq_cst (s32)) 16 17 ; CHECK: DIVERGENT 18 ; CHECK-SAME: G_ATOMIC_CMPXCHG_WITH_SUCCESS 19 %5:_(s32), %6:_(s1) = G_ATOMIC_CMPXCHG_WITH_SUCCESS %0(p0), %1, %2 :: (load store seq_cst seq_cst (s32) ) 20 $vgpr0 = COPY %4(s32) 21 SI_RETURN implicit $vgpr0 22... 23 24--- 25name: test_atomics 26tracksRegLiveness: true 27body: | 28 bb.1: 29 30 %2:_(s32) = IMPLICIT_DEF 31 %3:_(s32) = IMPLICIT_DEF 32 %0:_(p1) = G_MERGE_VALUES %2(s32), %3(s32) 33 %1:_(s32) = IMPLICIT_DEF 34 %5:_(s32) = IMPLICIT_DEF 35 36 ; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_ATOMICRMW_ADD 37 %4:_(s32) = G_ATOMICRMW_ADD %2, %3 38 39 ; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_ATOMICRMW_SUB 40 %6:_(s32) = G_ATOMICRMW_SUB %1, %5 41 42 ; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_ATOMICRMW_AND 43 %7:_(s32) = G_ATOMICRMW_AND %2, %3 44 45 ; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_ATOMICRMW_NAND 46 %8:_(s32) = G_ATOMICRMW_NAND %1, %5 47 48 ; CHECK: DIVERGENT: %{{[0-9]}}: %{{[0-9]}}:_(s32) = G_ATOMICRMW_OR 49 %9:_(s32) = G_ATOMICRMW_OR %2, %3 50 51 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_XOR 52 %10:_(s32) = G_ATOMICRMW_XOR %1, %5 53 54 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_MAX 55 %11:_(s32) = G_ATOMICRMW_MAX %2, %3 56 57 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_MIN 58 %12:_(s32) = G_ATOMICRMW_MIN %1, %5 59 60 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_UMAX 61 %13:_(s32) = G_ATOMICRMW_UMAX %2, %3 62 63 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_UMIN 64 %14:_(s32) = G_ATOMICRMW_UMIN %1, %5 65 66 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_FADD 67 %15:_(s32) = G_ATOMICRMW_FADD %2, %3 68 69 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_FSUB 70 %16:_(s32) = G_ATOMICRMW_FSUB %1, %5 71 72 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_FMAX 73 %17:_(s32) = G_ATOMICRMW_FMAX %2, %3 74 75 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_FMIN 76 %18:_(s32) = G_ATOMICRMW_FMIN %1, %5 77 78 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_UINC_WRAP 79 %19:_(s32) = G_ATOMICRMW_UINC_WRAP %1, %5 80 81 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_UDEC_WRAP 82 %20:_(s32) = G_ATOMICRMW_UDEC_WRAP %1, %5 83 84 $vgpr0 = COPY %4(s32) 85 SI_RETURN implicit $vgpr0 86 87... 88 89--- 90name: test_buffer_atomics_always_divergent 91tracksRegLiveness: true 92body: | 93 bb.1: 94 liveins: $sgpr0, $sgpr2_sgpr3, $sgpr4_sgpr5_sgpr6_sgpr7 95 96 %0:_(s32) = COPY $sgpr0 97 %1:sgpr(p0) = COPY $sgpr2_sgpr3 98 %2:_(s32) = IMPLICIT_DEF 99 %3:_(<4 x s32>) = COPY $sgpr4_sgpr5_sgpr6_sgpr7 100 %4:_(s32) = G_CONSTANT i32 0 101 %ptr_lds:_(p3) = G_IMPLICIT_DEF 102 103 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_FMIN 104 %5:_(s32) = G_ATOMICRMW_FMIN %ptr_lds, %4 105 106 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_ATOMICRMW_FMAX 107 %6:_(s32) = G_ATOMICRMW_FMAX %ptr_lds, %4 108 109 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SWAP 110 %7:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SWAP %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 111 112 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_ADD 113 %8:_(s32) = G_AMDGPU_BUFFER_ATOMIC_ADD %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 114 115 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SUB 116 %9:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SUB %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 117 118 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMIN 119 %10:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMIN %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 120 121 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMIN 122 %11:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMIN %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 123 124 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMAX 125 %12:_(s32) = G_AMDGPU_BUFFER_ATOMIC_SMAX %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 126 127 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMAX 128 %13:_(s32) = G_AMDGPU_BUFFER_ATOMIC_UMAX %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 129 130 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_AND 131 %14:_(s32) = G_AMDGPU_BUFFER_ATOMIC_AND %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 132 133 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_OR 134 %15:_(s32) = G_AMDGPU_BUFFER_ATOMIC_OR %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 135 136 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_XOR 137 %16:_(s32) = G_AMDGPU_BUFFER_ATOMIC_XOR %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 138 139 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_INC 140 %17:_(s32) = G_AMDGPU_BUFFER_ATOMIC_INC %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 141 142 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_DEC 143 %18:_(s32) = G_AMDGPU_BUFFER_ATOMIC_DEC %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 144 145 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD 146 %19:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FADD %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 147 148 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMIN 149 %20:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMIN %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 150 151 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMAX 152 %21:_(s32) = G_AMDGPU_BUFFER_ATOMIC_FMAX %0, %3, %4, %4, %4, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 153 154 ; CHECK: DIVERGENT: %{{[0-9]*}}: %{{[0-9]*}}:_(s32) = G_AMDGPU_BUFFER_ATOMIC_CMPSWAP 155 %22:_(s32) = G_AMDGPU_BUFFER_ATOMIC_CMPSWAP %0, %4, %3, %2, %2, %2, 0, 0, 0 :: (volatile dereferenceable load store (s32), align 1, addrspace 8) 156 157 ; CHECK: DIVERGENT 158 ; CHECK-SAME: G_AMDGPU_ATOMIC_CMPXCHG 159 %23:vgpr(s32) = G_AMDGPU_ATOMIC_CMPXCHG %1, %4 :: (load store seq_cst (s32), addrspace 0) 160 161... 162