1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s 3 4# Test that we fold correct element from G_UNMERGE_VALUES into fma 5 6--- 7name: test_f32_add_mul 8body: | 9 bb.1: 10 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 11 12 ; GFX10-LABEL: name: test_f32_add_mul 13 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 14 ; GFX10-NEXT: {{ $}} 15 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 16 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 17 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 18 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 19 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 20 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1 21 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) 22 %0:_(s32) = COPY $vgpr0 23 %1:_(s32) = COPY $vgpr1 24 %ptr:_(p1) = COPY $vgpr2_vgpr3 25 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 26 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 27 %6:_(s32) = G_FMUL %0, %1 28 %7:_(s32) = G_FADD %6, %el1 29 $vgpr0 = COPY %7(s32) 30... 31 32--- 33name: test_f32_add_mul_rhs 34machineFunctionInfo: 35 mode: 36 fp32-input-denormals: false 37 fp32-output-denormals: false 38body: | 39 bb.1: 40 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 41 42 ; GFX10-LABEL: name: test_f32_add_mul_rhs 43 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 44 ; GFX10-NEXT: {{ $}} 45 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 46 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 47 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 48 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 49 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 50 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], %el1 51 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) 52 %0:_(s32) = COPY $vgpr0 53 %1:_(s32) = COPY $vgpr1 54 %ptr:_(p1) = COPY $vgpr2_vgpr3 55 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 56 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 57 %6:_(s32) = G_FMUL %0, %1 58 %7:_(s32) = G_FADD %el1, %6 59 $vgpr0 = COPY %7(s32) 60... 61 62--- 63name: test_f16_f32_add_ext_mul 64machineFunctionInfo: 65 mode: 66 fp32-input-denormals: false 67 fp32-output-denormals: false 68body: | 69 bb.1: 70 liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 71 72 ; GFX10-LABEL: name: test_f16_f32_add_ext_mul 73 ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 74 ; GFX10-NEXT: {{ $}} 75 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 76 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) 77 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 78 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) 79 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 80 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 81 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 82 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) 83 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) 84 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 85 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) 86 %0:_(s32) = COPY $sgpr0 87 %1:_(s16) = G_TRUNC %0(s32) 88 %2:_(s32) = COPY $sgpr1 89 %3:_(s16) = G_TRUNC %2(s32) 90 %ptr:_(p1) = COPY $vgpr0_vgpr1 91 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 92 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 93 %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3 94 %9:_(s32) = G_FPEXT %8(s16) 95 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1 96 $vgpr0 = COPY %10(s32) 97... 98 99--- 100name: test_f16_f32_add_ext_mul_rhs 101machineFunctionInfo: 102 mode: 103 fp32-input-denormals: false 104 fp32-output-denormals: false 105body: | 106 bb.1: 107 liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 108 109 ; GFX10-LABEL: name: test_f16_f32_add_ext_mul_rhs 110 ; GFX10: liveins: $sgpr0, $sgpr1, $vgpr0_vgpr1 111 ; GFX10-NEXT: {{ $}} 112 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0 113 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) 114 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $sgpr1 115 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) 116 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 117 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 118 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 119 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) 120 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) 121 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 122 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) 123 %0:_(s32) = COPY $sgpr0 124 %1:_(s16) = G_TRUNC %0(s32) 125 %2:_(s32) = COPY $sgpr1 126 %3:_(s16) = G_TRUNC %2(s32) 127 %ptr:_(p1) = COPY $vgpr0_vgpr1 128 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 129 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 130 %8:_(s16) = nnan ninf nsz arcp contract afn reassoc G_FMUL %1, %3 131 %9:_(s32) = G_FPEXT %8(s16) 132 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9 133 $vgpr0 = COPY %10(s32) 134... 135 136--- 137name: test_f32_add_fma_mul 138body: | 139 bb.1: 140 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 141 142 ; GFX10-LABEL: name: test_f32_add_fma_mul 143 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 144 ; GFX10-NEXT: {{ $}} 145 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 146 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 147 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 148 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 149 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5 150 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 151 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 152 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1 153 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] 154 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) 155 %0:_(s32) = COPY $vgpr0 156 %1:_(s32) = COPY $vgpr1 157 %2:_(s32) = COPY $vgpr2 158 %3:_(s32) = COPY $vgpr3 159 %ptr:_(p1) = COPY $vgpr4_vgpr5 160 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 161 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 162 %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3 163 %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8 164 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %9, %el1 165 $vgpr0 = COPY %10(s32) 166... 167 168--- 169name: test_f32_add_fma_mul_rhs 170body: | 171 bb.1: 172 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 173 174 ; GFX10-LABEL: name: test_f32_add_fma_mul_rhs 175 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4_vgpr5 176 ; GFX10-NEXT: {{ $}} 177 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 178 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 179 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 180 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 181 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr4_vgpr5 182 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 183 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 184 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY2]], [[COPY3]], %el1 185 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] 186 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) 187 %0:_(s32) = COPY $vgpr0 188 %1:_(s32) = COPY $vgpr1 189 %2:_(s32) = COPY $vgpr2 190 %3:_(s32) = COPY $vgpr3 191 %ptr:_(p1) = COPY $vgpr4_vgpr5 192 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 193 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 194 %8:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMUL %2, %3 195 %9:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FMA %0, %1, %8 196 %10:_(s32) = nnan ninf nsz arcp contract afn reassoc G_FADD %el1, %9 197 $vgpr0 = COPY %10(s32) 198... 199 200--- 201name: test_f16_f32_add_fma_ext_mul 202machineFunctionInfo: 203 mode: 204 fp32-input-denormals: false 205 fp32-output-denormals: false 206body: | 207 bb.1: 208 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 209 210 ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul 211 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 212 ; GFX10-NEXT: {{ $}} 213 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 214 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 215 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 216 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 217 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 218 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 219 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) 220 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 221 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) 222 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) 223 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) 224 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 225 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] 226 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) 227 %0:_(s32) = COPY $vgpr0 228 %1:_(s32) = COPY $vgpr1 229 %ptr:_(p1) = COPY $vgpr2_vgpr3 230 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 231 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 232 %6:_(s32) = COPY $vgpr4 233 %7:_(s16) = G_TRUNC %6(s32) 234 %8:_(s32) = COPY $vgpr5 235 %9:_(s16) = G_TRUNC %8(s32) 236 %10:_(s16) = G_FMUL %7, %9 237 %11:_(s32) = G_FPEXT %10(s16) 238 %12:_(s32) = G_FMA %0, %1, %11 239 %13:_(s32) = G_FADD %12, %el1 240 $vgpr0 = COPY %13(s32) 241... 242 243--- 244name: test_f16_f32_add_ext_fma_mul 245machineFunctionInfo: 246 mode: 247 fp32-input-denormals: false 248 fp32-output-denormals: false 249body: | 250 bb.1: 251 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 252 253 ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul 254 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3, $vgpr4, $vgpr5 255 ; GFX10-NEXT: {{ $}} 256 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 257 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) 258 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 259 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) 260 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 261 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 262 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 263 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 264 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) 265 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 266 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) 267 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) 268 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) 269 ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) 270 ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) 271 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1 272 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] 273 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) 274 %0:_(s32) = COPY $vgpr0 275 %1:_(s16) = G_TRUNC %0(s32) 276 %2:_(s32) = COPY $vgpr1 277 %3:_(s16) = G_TRUNC %2(s32) 278 %ptr:_(p1) = COPY $vgpr2_vgpr3 279 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 280 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 281 %8:_(s32) = COPY $vgpr4 282 %9:_(s16) = G_TRUNC %8(s32) 283 %10:_(s32) = COPY $vgpr5 284 %11:_(s16) = G_TRUNC %10(s32) 285 %12:_(s16) = G_FMUL %9, %11 286 %13:_(s16) = G_FMUL %1, %3 287 %14:_(s16) = G_FADD %13, %12 288 %15:_(s32) = G_FPEXT %14(s16) 289 %16:_(s32) = G_FADD %15, %el1 290 $vgpr0 = COPY %16(s32) 291... 292 293--- 294name: test_f16_f32_add_fma_ext_mul_rhs 295machineFunctionInfo: 296 mode: 297 fp32-input-denormals: false 298 fp32-output-denormals: false 299body: | 300 bb.1: 301 liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 302 303 ; GFX10-LABEL: name: test_f16_f32_add_fma_ext_mul_rhs 304 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 305 ; GFX10-NEXT: {{ $}} 306 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 307 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 308 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 309 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2 310 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 311 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 312 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) 313 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 314 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) 315 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) 316 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) 317 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], %el1 318 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FMA]] 319 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) 320 %ptr:_(p1) = COPY $vgpr0_vgpr1 321 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 322 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 323 %4:_(s32) = COPY $vgpr2 324 %5:_(s32) = COPY $vgpr3 325 %6:_(s32) = COPY $vgpr4 326 %7:_(s16) = G_TRUNC %6(s32) 327 %8:_(s32) = COPY $vgpr5 328 %9:_(s16) = G_TRUNC %8(s32) 329 %10:_(s16) = G_FMUL %7, %9 330 %11:_(s32) = G_FPEXT %10(s16) 331 %12:_(s32) = G_FMA %4, %5, %11 332 %13:_(s32) = G_FADD %el1, %12 333 $vgpr0 = COPY %13(s32) 334... 335 336--- 337name: test_f16_f32_add_ext_fma_mul_rhs 338machineFunctionInfo: 339 mode: 340 fp32-input-denormals: false 341 fp32-output-denormals: false 342body: | 343 bb.1: 344 liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 345 346 ; GFX10-LABEL: name: test_f16_f32_add_ext_fma_mul_rhs 347 ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 348 ; GFX10-NEXT: {{ $}} 349 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 350 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 351 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 352 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr2 353 ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) 354 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr3 355 ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) 356 ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr4 357 ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) 358 ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr5 359 ; GFX10-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[COPY3]](s32) 360 ; GFX10-NEXT: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16) 361 ; GFX10-NEXT: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16) 362 ; GFX10-NEXT: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16) 363 ; GFX10-NEXT: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16) 364 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FPEXT2]], [[FPEXT3]], %el1 365 ; GFX10-NEXT: [[FMA1:%[0-9]+]]:_(s32) = G_FMA [[FPEXT]], [[FPEXT1]], [[FMA]] 366 ; GFX10-NEXT: $vgpr0 = COPY [[FMA1]](s32) 367 %ptr:_(p1) = COPY $vgpr0_vgpr1 368 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 369 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 370 %4:_(s32) = COPY $vgpr2 371 %5:_(s16) = G_TRUNC %4(s32) 372 %6:_(s32) = COPY $vgpr3 373 %7:_(s16) = G_TRUNC %6(s32) 374 %8:_(s32) = COPY $vgpr4 375 %9:_(s16) = G_TRUNC %8(s32) 376 %10:_(s32) = COPY $vgpr5 377 %11:_(s16) = G_TRUNC %10(s32) 378 %12:_(s16) = G_FMUL %9, %11 379 %13:_(s16) = G_FMUL %5, %7 380 %14:_(s16) = G_FADD %13, %12 381 %15:_(s32) = G_FPEXT %14(s16) 382 %16:_(s32) = G_FADD %el1, %15 383 $vgpr0 = COPY %16(s32) 384... 385 386--- 387name: test_f32_sub_mul 388machineFunctionInfo: 389 mode: 390 fp32-input-denormals: false 391 fp32-output-denormals: false 392body: | 393 bb.1: 394 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 395 396 ; GFX10-LABEL: name: test_f32_sub_mul 397 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 398 ; GFX10-NEXT: {{ $}} 399 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 400 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 401 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr0_vgpr1 402 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 403 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 404 ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG %el1 405 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[FNEG]] 406 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) 407 %0:_(s32) = COPY $vgpr0 408 %1:_(s32) = COPY $vgpr1 409 %ptr:_(p1) = COPY $vgpr0_vgpr1 410 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 411 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 412 %6:_(s32) = G_FMUL %0, %1 413 %7:_(s32) = G_FSUB %6, %el1 414 $vgpr0 = COPY %7(s32) 415... 416 417--- 418name: test_f32_sub_mul_rhs 419machineFunctionInfo: 420 mode: 421 fp32-input-denormals: false 422 fp32-output-denormals: false 423body: | 424 bb.1: 425 liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 426 427 ; GFX10-LABEL: name: test_f32_sub_mul_rhs 428 ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2_vgpr3 429 ; GFX10-NEXT: {{ $}} 430 ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 431 ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 432 ; GFX10-NEXT: %ptr:_(p1) = COPY $vgpr2_vgpr3 433 ; GFX10-NEXT: %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 434 ; GFX10-NEXT: %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 435 ; GFX10-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]] 436 ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[FNEG]], [[COPY1]], %el1 437 ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) 438 %0:_(s32) = COPY $vgpr0 439 %1:_(s32) = COPY $vgpr1 440 %ptr:_(p1) = COPY $vgpr2_vgpr3 441 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) 442 %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) 443 %6:_(s32) = G_FMUL %0, %1 444 %7:_(s32) = G_FSUB %el1, %6 445 $vgpr0 = COPY %7(s32) 446... 447