1# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s 2# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -passes si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s 3--- | 4 define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 { 5 %f16.val0 = load volatile half, ptr addrspace(1) undef 6 %f16.val1 = load volatile half, ptr addrspace(1) undef 7 %f32.val = load volatile float, ptr addrspace(1) undef 8 %f16.add0 = fadd half %f16.val0, 0xH3C00 9 %f32.add = fadd float %f32.val, 1.000000e+00 10 store volatile half %f16.add0, ptr addrspace(1) undef 11 store volatile float %f32.add, ptr addrspace(1) undef 12 ret void 13 } 14 15 define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 { 16 %f16.val0 = load volatile half, ptr addrspace(1) undef 17 %f16.val1 = load volatile half, ptr addrspace(1) undef 18 %f32.val = load volatile float, ptr addrspace(1) undef 19 %f16.add0 = fadd half %f16.val0, 0xH3C00 20 %f32.add = fadd float %f32.val, 1.000000e+00 21 store volatile half %f16.add0, ptr addrspace(1) undef 22 store volatile float %f32.add, ptr addrspace(1) undef 23 ret void 24 } 25 26 define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 { 27 %f16.val0 = load volatile half, ptr addrspace(1) undef 28 %f16.val1 = load volatile half, ptr addrspace(1) undef 29 %f32.val = load volatile float, ptr addrspace(1) undef 30 %f16.add0 = fadd half %f16.val0, 0xH3C00 31 %f32.add = fadd float %f32.val, 1.000000e+00 32 store volatile half %f16.add0, ptr addrspace(1) undef 33 store volatile float %f32.add, ptr addrspace(1) undef 34 ret void 35 } 36 37 define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 { 38 %f16.val0 = load volatile half, ptr addrspace(1) undef 39 %f16.val1 = load volatile half, ptr addrspace(1) undef 40 %f32.val = load volatile float, ptr addrspace(1) undef 41 %f16.add0 = fadd half %f16.val0, 0xH3C00 42 %f16.add1 = fadd half %f16.val1, 0xH3C00 43 %f32.add = fadd float %f32.val, 1.000000e+00 44 store volatile half %f16.add0, ptr addrspace(1) undef 45 store volatile half %f16.add1, ptr addrspace(1) undef 46 store volatile float %f32.add, ptr addrspace(1) undef 47 ret void 48 } 49 50 define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 { 51 %f16.val0 = load volatile half, ptr addrspace(1) undef 52 %f16.val1 = load volatile half, ptr addrspace(1) undef 53 %f16.add0 = fadd half %f16.val0, 0xH0001 54 %f16.add1 = fadd half %f16.val1, 0xH0001 55 store volatile half %f16.add0, ptr addrspace(1) undef 56 store volatile half %f16.add1,ptr addrspace(1) undef 57 ret void 58 } 59 60 define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 { 61 %f16.val0 = load volatile half, ptr addrspace(1) undef 62 %f16.val1 = load volatile half, ptr addrspace(1) undef 63 %f32.val = load volatile float, ptr addrspace(1) undef 64 %f16.add0 = fadd half %f16.val0, 0xHFFFE 65 %f16.add1 = fadd half %f16.val1, 0xHFFFE 66 %f32.add = fadd float %f32.val, 0xffffffffc0000000 67 store volatile half %f16.add0, ptr addrspace(1) undef 68 store volatile half %f16.add1, ptr addrspace(1) undef 69 store volatile float %f32.add, ptr addrspace(1) undef 70 ret void 71 } 72 73 define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 { 74 %f32.val0 = load volatile float, ptr addrspace(1) undef 75 %f32.val1 = load volatile float, ptr addrspace(1) undef 76 %f32.val = load volatile float, ptr addrspace(1) undef 77 %f32.add0 = fadd float %f32.val0, 1.0 78 %f32.add1 = fadd float %f32.val1, 1.0 79 store volatile float %f32.add0, ptr addrspace(1) undef 80 store volatile float %f32.add1, ptr addrspace(1) undef 81 ret void 82 } 83 84 define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 { 85 %f16.val0 = load volatile half, ptr addrspace(1) undef 86 %f16.val1 = load volatile half, ptr addrspace(1) undef 87 %f32.val = load volatile half, ptr addrspace(1) undef 88 %f16.add0 = fadd half %f16.val0, 0xH3C00 89 %f32.add = fadd half %f32.val, 1.000000e+00 90 store volatile half %f16.add0, ptr addrspace(1) undef 91 store volatile half %f32.add, ptr addrspace(1) undef 92 ret void 93 } 94 95 define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 { 96 %f16.val0 = load volatile half, ptr addrspace(1) undef 97 %f16.val1 = load volatile half, ptr addrspace(1) undef 98 %f32.val = load volatile half, ptr addrspace(1) undef 99 %f16.add0 = fadd half %f16.val0, 0xH3C00 100 %f32.add = fadd half %f32.val, 1.000000e+00 101 store volatile half %f16.add0, ptr addrspace(1) undef 102 store volatile half %f32.add, ptr addrspace(1) undef 103 ret void 104 } 105 106 attributes #0 = { nounwind } 107 108... 109--- 110 111# f32 1.0 with a single use should be folded as the low 32-bits of a 112# literal constant. 113 114# CHECK-LABEL: name: add_f32_1.0_one_f16_use 115# CHECK: %13:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $mode, implicit $exec 116 117name: add_f32_1.0_one_f16_use 118alignment: 1 119exposesReturnsTwice: false 120legalized: false 121regBankSelected: false 122selected: false 123tracksRegLiveness: true 124registers: 125 - { id: 0, class: sreg_64 } 126 - { id: 1, class: sreg_32 } 127 - { id: 2, class: sgpr_32 } 128 - { id: 3, class: vgpr_32 } 129 - { id: 4, class: sreg_64 } 130 - { id: 5, class: sreg_32 } 131 - { id: 6, class: sreg_64 } 132 - { id: 7, class: sreg_32 } 133 - { id: 8, class: sreg_32 } 134 - { id: 9, class: sreg_32 } 135 - { id: 10, class: sgpr_128 } 136 - { id: 11, class: vgpr_32 } 137 - { id: 12, class: vgpr_32 } 138 - { id: 13, class: vgpr_32 } 139frameInfo: 140 isFrameAddressTaken: false 141 isReturnAddressTaken: false 142 hasStackMap: false 143 hasPatchPoint: false 144 stackSize: 0 145 offsetAdjustment: 0 146 maxAlignment: 0 147 adjustsStack: false 148 hasCalls: false 149 maxCallFrameSize: 0 150 hasOpaqueSPAdjustment: false 151 hasVAStart: false 152 hasMustTailInVarArgFunc: false 153body: | 154 bb.0 (%ir-block.0): 155 %4 = IMPLICIT_DEF 156 %5 = COPY %4.sub1 157 %6 = IMPLICIT_DEF 158 %7 = COPY %6.sub0 159 %8 = S_MOV_B32 61440 160 %9 = S_MOV_B32 -1 161 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 162 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 163 %12 = V_MOV_B32_e32 1065353216, implicit $exec 164 %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $mode, implicit $exec 165 BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 166 S_ENDPGM 0 167 168... 169--- 170# Materialized f32 inline immediate should not be folded into the f16 171# operands 172 173# CHECK-LABEL: name: add_f32_1.0_multi_f16_use 174# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $mode, implicit $exec 175# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, killed %12, implicit $mode, implicit $exec 176 177 178name: add_f32_1.0_multi_f16_use 179alignment: 1 180exposesReturnsTwice: false 181legalized: false 182regBankSelected: false 183selected: false 184tracksRegLiveness: true 185registers: 186 - { id: 0, class: sreg_64 } 187 - { id: 1, class: sreg_32 } 188 - { id: 2, class: sgpr_32 } 189 - { id: 3, class: vgpr_32 } 190 - { id: 4, class: sreg_64 } 191 - { id: 5, class: sreg_32 } 192 - { id: 6, class: sreg_64 } 193 - { id: 7, class: sreg_32 } 194 - { id: 8, class: sreg_32 } 195 - { id: 9, class: sreg_32 } 196 - { id: 10, class: sgpr_128 } 197 - { id: 11, class: vgpr_32 } 198 - { id: 12, class: vgpr_32 } 199 - { id: 13, class: vgpr_32 } 200 - { id: 14, class: vgpr_32 } 201 - { id: 15, class: vgpr_32 } 202frameInfo: 203 isFrameAddressTaken: false 204 isReturnAddressTaken: false 205 hasStackMap: false 206 hasPatchPoint: false 207 stackSize: 0 208 offsetAdjustment: 0 209 maxAlignment: 0 210 adjustsStack: false 211 hasCalls: false 212 maxCallFrameSize: 0 213 hasOpaqueSPAdjustment: false 214 hasVAStart: false 215 hasMustTailInVarArgFunc: false 216body: | 217 bb.0 (%ir-block.0): 218 %4 = IMPLICIT_DEF 219 %5 = COPY %4.sub1 220 %6 = IMPLICIT_DEF 221 %7 = COPY %6.sub0 222 %8 = S_MOV_B32 61440 223 %9 = S_MOV_B32 -1 224 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 225 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 226 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`) 227 %13 = V_MOV_B32_e32 1065353216, implicit $exec 228 %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec 229 %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec 230 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 231 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 232 S_ENDPGM 0 233 234... 235--- 236 237# f32 1.0 should be folded into the single f32 use as an inline 238# immediate, and folded into the single f16 use as a literal constant 239 240# CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use 241# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $mode, implicit $exec 242# CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec 243 244name: add_f32_1.0_one_f32_use_one_f16_use 245alignment: 1 246exposesReturnsTwice: false 247legalized: false 248regBankSelected: false 249selected: false 250tracksRegLiveness: true 251registers: 252 - { id: 0, class: sreg_64 } 253 - { id: 1, class: sreg_32 } 254 - { id: 2, class: sgpr_32 } 255 - { id: 3, class: vgpr_32 } 256 - { id: 4, class: sreg_64 } 257 - { id: 5, class: sreg_32 } 258 - { id: 6, class: sreg_64 } 259 - { id: 7, class: sreg_32 } 260 - { id: 8, class: sreg_32 } 261 - { id: 9, class: sreg_32 } 262 - { id: 10, class: sgpr_128 } 263 - { id: 11, class: vgpr_32 } 264 - { id: 12, class: vgpr_32 } 265 - { id: 13, class: vgpr_32 } 266 - { id: 14, class: vgpr_32 } 267 - { id: 15, class: vgpr_32 } 268 - { id: 16, class: vgpr_32 } 269frameInfo: 270 isFrameAddressTaken: false 271 isReturnAddressTaken: false 272 hasStackMap: false 273 hasPatchPoint: false 274 stackSize: 0 275 offsetAdjustment: 0 276 maxAlignment: 0 277 adjustsStack: false 278 hasCalls: false 279 maxCallFrameSize: 0 280 hasOpaqueSPAdjustment: false 281 hasVAStart: false 282 hasMustTailInVarArgFunc: false 283body: | 284 bb.0 (%ir-block.0): 285 %4 = IMPLICIT_DEF 286 %5 = COPY %4.sub1 287 %6 = IMPLICIT_DEF 288 %7 = COPY %6.sub0 289 %8 = S_MOV_B32 61440 290 %9 = S_MOV_B32 -1 291 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 292 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 293 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 294 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`) 295 %14 = V_MOV_B32_e32 1065353216, implicit $exec 296 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec 297 %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec 298 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 299 BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`) 300 S_ENDPGM 0 301 302... 303--- 304 305# f32 1.0 should be folded for the single f32 use as an inline 306# constant, and not folded as a multi-use literal for the f16 cases 307 308# CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use 309# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $mode, implicit $exec 310# CHECK: %16:vgpr_32 = V_ADD_F16_e32 1065353216, %12, implicit $mode, implicit $exec 311# CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec 312 313name: add_f32_1.0_one_f32_use_multi_f16_use 314alignment: 1 315exposesReturnsTwice: false 316legalized: false 317regBankSelected: false 318selected: false 319tracksRegLiveness: true 320registers: 321 - { id: 0, class: sreg_64 } 322 - { id: 1, class: sreg_32 } 323 - { id: 2, class: sgpr_32 } 324 - { id: 3, class: vgpr_32 } 325 - { id: 4, class: sreg_64 } 326 - { id: 5, class: sreg_32 } 327 - { id: 6, class: sreg_64 } 328 - { id: 7, class: sreg_32 } 329 - { id: 8, class: sreg_32 } 330 - { id: 9, class: sreg_32 } 331 - { id: 10, class: sgpr_128 } 332 - { id: 11, class: vgpr_32 } 333 - { id: 12, class: vgpr_32 } 334 - { id: 13, class: vgpr_32 } 335 - { id: 14, class: vgpr_32 } 336 - { id: 15, class: vgpr_32 } 337 - { id: 16, class: vgpr_32 } 338 - { id: 17, class: vgpr_32 } 339frameInfo: 340 isFrameAddressTaken: false 341 isReturnAddressTaken: false 342 hasStackMap: false 343 hasPatchPoint: false 344 stackSize: 0 345 offsetAdjustment: 0 346 maxAlignment: 0 347 adjustsStack: false 348 hasCalls: false 349 maxCallFrameSize: 0 350 hasOpaqueSPAdjustment: false 351 hasVAStart: false 352 hasMustTailInVarArgFunc: false 353body: | 354 bb.0 (%ir-block.0): 355 %4 = IMPLICIT_DEF 356 %5 = COPY %4.sub1 357 %6 = IMPLICIT_DEF 358 %7 = COPY %6.sub0 359 %8 = S_MOV_B32 61440 360 %9 = S_MOV_B32 -1 361 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 362 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 363 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 364 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`) 365 %14 = V_MOV_B32_e32 1065353216, implicit $exec 366 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec 367 %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec 368 %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec 369 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 370 BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 371 BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`) 372 S_ENDPGM 0 373 374... 375--- 376# CHECK-LABEL: name: add_i32_1_multi_f16_use 377# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $mode, implicit $exec 378# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $mode, implicit $exec 379 380 381name: add_i32_1_multi_f16_use 382alignment: 1 383exposesReturnsTwice: false 384legalized: false 385regBankSelected: false 386selected: false 387tracksRegLiveness: true 388registers: 389 - { id: 0, class: sreg_64 } 390 - { id: 1, class: sreg_32 } 391 - { id: 2, class: sgpr_32 } 392 - { id: 3, class: vgpr_32 } 393 - { id: 4, class: sreg_64 } 394 - { id: 5, class: sreg_32 } 395 - { id: 6, class: sreg_64 } 396 - { id: 7, class: sreg_32 } 397 - { id: 8, class: sreg_32 } 398 - { id: 9, class: sreg_32 } 399 - { id: 10, class: sgpr_128 } 400 - { id: 11, class: vgpr_32 } 401 - { id: 12, class: vgpr_32 } 402 - { id: 13, class: vgpr_32 } 403 - { id: 14, class: vgpr_32 } 404 - { id: 15, class: vgpr_32 } 405frameInfo: 406 isFrameAddressTaken: false 407 isReturnAddressTaken: false 408 hasStackMap: false 409 hasPatchPoint: false 410 stackSize: 0 411 offsetAdjustment: 0 412 maxAlignment: 0 413 adjustsStack: false 414 hasCalls: false 415 maxCallFrameSize: 0 416 hasOpaqueSPAdjustment: false 417 hasVAStart: false 418 hasMustTailInVarArgFunc: false 419body: | 420 bb.0 (%ir-block.0): 421 %4 = IMPLICIT_DEF 422 %5 = COPY %4.sub1 423 %6 = IMPLICIT_DEF 424 %7 = COPY %6.sub0 425 %8 = S_MOV_B32 61440 426 %9 = S_MOV_B32 -1 427 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 428 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 429 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`) 430 %13 = V_MOV_B32_e32 1, implicit $exec 431 %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec 432 %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec 433 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 434 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 435 S_ENDPGM 0 436 437... 438--- 439 440# CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use 441# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $mode, implicit $exec 442# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $mode, implicit $exec 443# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $mode, implicit $exec 444 445name: add_i32_m2_one_f32_use_multi_f16_use 446alignment: 1 447exposesReturnsTwice: false 448legalized: false 449regBankSelected: false 450selected: false 451tracksRegLiveness: true 452registers: 453 - { id: 0, class: sreg_64 } 454 - { id: 1, class: sreg_32 } 455 - { id: 2, class: sgpr_32 } 456 - { id: 3, class: vgpr_32 } 457 - { id: 4, class: sreg_64 } 458 - { id: 5, class: sreg_32 } 459 - { id: 6, class: sreg_64 } 460 - { id: 7, class: sreg_32 } 461 - { id: 8, class: sreg_32 } 462 - { id: 9, class: sreg_32 } 463 - { id: 10, class: sgpr_128 } 464 - { id: 11, class: vgpr_32 } 465 - { id: 12, class: vgpr_32 } 466 - { id: 13, class: vgpr_32 } 467 - { id: 14, class: vgpr_32 } 468 - { id: 15, class: vgpr_32 } 469 - { id: 16, class: vgpr_32 } 470 - { id: 17, class: vgpr_32 } 471frameInfo: 472 isFrameAddressTaken: false 473 isReturnAddressTaken: false 474 hasStackMap: false 475 hasPatchPoint: false 476 stackSize: 0 477 offsetAdjustment: 0 478 maxAlignment: 0 479 adjustsStack: false 480 hasCalls: false 481 maxCallFrameSize: 0 482 hasOpaqueSPAdjustment: false 483 hasVAStart: false 484 hasMustTailInVarArgFunc: false 485body: | 486 bb.0 (%ir-block.0): 487 %4 = IMPLICIT_DEF 488 %5 = COPY %4.sub1 489 %6 = IMPLICIT_DEF 490 %7 = COPY %6.sub0 491 %8 = S_MOV_B32 61440 492 %9 = S_MOV_B32 -1 493 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 494 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 495 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 496 %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`) 497 %14 = V_MOV_B32_e32 -2, implicit $exec 498 %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec 499 %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec 500 %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec 501 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 502 BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 503 BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`) 504 S_ENDPGM 0 505 506... 507--- 508 509# f32 1.0 should be folded for the single f32 use as an inline 510# constant, and not folded as a multi-use literal for the f16 cases 511 512# CHECK-LABEL: name: add_f16_1.0_multi_f32_use 513# CHECK: %14:vgpr_32 = V_ADD_F32_e32 15360, %11, implicit $mode, implicit $exec 514# CHECK: %15:vgpr_32 = V_ADD_F32_e32 15360, %12, implicit $mode, implicit $exec 515 516name: add_f16_1.0_multi_f32_use 517alignment: 1 518exposesReturnsTwice: false 519legalized: false 520regBankSelected: false 521selected: false 522tracksRegLiveness: true 523registers: 524 - { id: 0, class: sreg_64 } 525 - { id: 1, class: sreg_32 } 526 - { id: 2, class: sgpr_32 } 527 - { id: 3, class: vgpr_32 } 528 - { id: 4, class: sreg_64 } 529 - { id: 5, class: sreg_32 } 530 - { id: 6, class: sreg_64 } 531 - { id: 7, class: sreg_32 } 532 - { id: 8, class: sreg_32 } 533 - { id: 9, class: sreg_32 } 534 - { id: 10, class: sgpr_128 } 535 - { id: 11, class: vgpr_32 } 536 - { id: 12, class: vgpr_32 } 537 - { id: 13, class: vgpr_32 } 538 - { id: 14, class: vgpr_32 } 539 - { id: 15, class: vgpr_32 } 540frameInfo: 541 isFrameAddressTaken: false 542 isReturnAddressTaken: false 543 hasStackMap: false 544 hasPatchPoint: false 545 stackSize: 0 546 offsetAdjustment: 0 547 maxAlignment: 0 548 adjustsStack: false 549 hasCalls: false 550 maxCallFrameSize: 0 551 hasOpaqueSPAdjustment: false 552 hasVAStart: false 553 hasMustTailInVarArgFunc: false 554body: | 555 bb.0 (%ir-block.0): 556 %4 = IMPLICIT_DEF 557 %5 = COPY %4.sub1 558 %6 = IMPLICIT_DEF 559 %7 = COPY %6.sub0 560 %8 = S_MOV_B32 61440 561 %9 = S_MOV_B32 -1 562 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 563 %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`) 564 %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`) 565 %13 = V_MOV_B32_e32 15360, implicit $exec 566 %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec 567 %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec 568 BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`) 569 BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`) 570 S_ENDPGM 0 571 572... 573--- 574 575# The low 16-bits are an inline immediate, but the high bits are junk 576 577# CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use 578# CHECK: %14:vgpr_32 = V_ADD_F16_e32 80886784, %11, implicit $mode, implicit $exec 579# CHECK: %15:vgpr_32 = V_ADD_F16_e32 80886784, %12, implicit $mode, implicit $exec 580 581name: add_f16_1.0_other_high_bits_multi_f16_use 582alignment: 1 583exposesReturnsTwice: false 584legalized: false 585regBankSelected: false 586selected: false 587tracksRegLiveness: true 588registers: 589 - { id: 0, class: sreg_64 } 590 - { id: 1, class: sreg_32 } 591 - { id: 2, class: sgpr_32 } 592 - { id: 3, class: vgpr_32 } 593 - { id: 4, class: sreg_64 } 594 - { id: 5, class: sreg_32 } 595 - { id: 6, class: sreg_64 } 596 - { id: 7, class: sreg_32 } 597 - { id: 8, class: sreg_32 } 598 - { id: 9, class: sreg_32 } 599 - { id: 10, class: sgpr_128 } 600 - { id: 11, class: vgpr_32 } 601 - { id: 12, class: vgpr_32 } 602 - { id: 13, class: vgpr_32 } 603 - { id: 14, class: vgpr_32 } 604 - { id: 15, class: vgpr_32 } 605frameInfo: 606 isFrameAddressTaken: false 607 isReturnAddressTaken: false 608 hasStackMap: false 609 hasPatchPoint: false 610 stackSize: 0 611 offsetAdjustment: 0 612 maxAlignment: 0 613 adjustsStack: false 614 hasCalls: false 615 maxCallFrameSize: 0 616 hasOpaqueSPAdjustment: false 617 hasVAStart: false 618 hasMustTailInVarArgFunc: false 619body: | 620 bb.0 (%ir-block.0): 621 %4 = IMPLICIT_DEF 622 %5 = COPY %4.sub1 623 %6 = IMPLICIT_DEF 624 %7 = COPY %6.sub0 625 %8 = S_MOV_B32 61440 626 %9 = S_MOV_B32 -1 627 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 628 %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 629 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 630 %13 = V_MOV_B32_e32 80886784, implicit $exec 631 %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec 632 %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec 633 BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 634 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 635 S_ENDPGM 0 636 637... 638--- 639 640# CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32 641# CHECK: %14:vgpr_32 = V_ADD_F32_e32 305413120, %11, implicit $mode, implicit $exec 642# CHECK: %15:vgpr_32 = V_ADD_F16_e32 305413120, %12, implicit $mode, implicit $exec 643name: add_f16_1.0_other_high_bits_use_f16_f32 644alignment: 1 645exposesReturnsTwice: false 646legalized: false 647regBankSelected: false 648selected: false 649tracksRegLiveness: true 650registers: 651 - { id: 0, class: sreg_64 } 652 - { id: 1, class: sreg_32 } 653 - { id: 2, class: sgpr_32 } 654 - { id: 3, class: vgpr_32 } 655 - { id: 4, class: sreg_64 } 656 - { id: 5, class: sreg_32 } 657 - { id: 6, class: sreg_64 } 658 - { id: 7, class: sreg_32 } 659 - { id: 8, class: sreg_32 } 660 - { id: 9, class: sreg_32 } 661 - { id: 10, class: sgpr_128 } 662 - { id: 11, class: vgpr_32 } 663 - { id: 12, class: vgpr_32 } 664 - { id: 13, class: vgpr_32 } 665 - { id: 14, class: vgpr_32 } 666 - { id: 15, class: vgpr_32 } 667frameInfo: 668 isFrameAddressTaken: false 669 isReturnAddressTaken: false 670 hasStackMap: false 671 hasPatchPoint: false 672 stackSize: 0 673 offsetAdjustment: 0 674 maxAlignment: 0 675 adjustsStack: false 676 hasCalls: false 677 maxCallFrameSize: 0 678 hasOpaqueSPAdjustment: false 679 hasVAStart: false 680 hasMustTailInVarArgFunc: false 681body: | 682 bb.0 (%ir-block.0): 683 %4 = IMPLICIT_DEF 684 %5 = COPY %4.sub1 685 %6 = IMPLICIT_DEF 686 %7 = COPY %6.sub0 687 %8 = S_MOV_B32 61440 688 %9 = S_MOV_B32 -1 689 %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4 690 %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`) 691 %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`) 692 %13 = V_MOV_B32_e32 305413120, implicit $exec 693 %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec 694 %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec 695 BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`) 696 BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`) 697 S_ENDPGM 0 698 699... 700