xref: /llvm-project/llvm/test/CodeGen/AMDGPU/fold-imm-f16-f32.mir (revision 042104985cc37d28db5f22f8bdf582c1108977d8)
1# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -run-pass si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
2# RUN: llc --mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs -passes si-fold-operands,si-shrink-instructions %s -o - | FileCheck %s
3--- |
4  define amdgpu_kernel void @add_f32_1.0_one_f16_use() #0 {
5    %f16.val0 = load volatile half, ptr addrspace(1) undef
6    %f16.val1 = load volatile half, ptr addrspace(1) undef
7    %f32.val = load volatile float, ptr addrspace(1) undef
8    %f16.add0 = fadd half %f16.val0, 0xH3C00
9    %f32.add = fadd float %f32.val, 1.000000e+00
10    store volatile half %f16.add0, ptr addrspace(1) undef
11    store volatile float %f32.add, ptr addrspace(1) undef
12    ret void
13  }
14
15  define amdgpu_kernel void @add_f32_1.0_multi_f16_use() #0 {
16    %f16.val0 = load volatile half, ptr addrspace(1) undef
17    %f16.val1 = load volatile half, ptr addrspace(1) undef
18    %f32.val = load volatile float, ptr addrspace(1) undef
19    %f16.add0 = fadd half %f16.val0, 0xH3C00
20    %f32.add = fadd float %f32.val, 1.000000e+00
21    store volatile half %f16.add0, ptr addrspace(1) undef
22    store volatile float %f32.add, ptr addrspace(1) undef
23    ret void
24  }
25
26  define amdgpu_kernel void @add_f32_1.0_one_f32_use_one_f16_use () #0 {
27    %f16.val0 = load volatile half, ptr addrspace(1) undef
28    %f16.val1 = load volatile half, ptr addrspace(1) undef
29    %f32.val = load volatile float, ptr addrspace(1) undef
30    %f16.add0 = fadd half %f16.val0, 0xH3C00
31    %f32.add = fadd float %f32.val, 1.000000e+00
32    store volatile half %f16.add0, ptr addrspace(1) undef
33    store volatile float %f32.add, ptr addrspace(1) undef
34    ret void
35  }
36
37  define amdgpu_kernel void @add_f32_1.0_one_f32_use_multi_f16_use () #0 {
38    %f16.val0 = load volatile half, ptr addrspace(1) undef
39    %f16.val1 = load volatile half, ptr addrspace(1) undef
40    %f32.val = load volatile float, ptr addrspace(1) undef
41    %f16.add0 = fadd half %f16.val0, 0xH3C00
42    %f16.add1 = fadd half %f16.val1, 0xH3C00
43    %f32.add = fadd float %f32.val, 1.000000e+00
44    store volatile half %f16.add0, ptr addrspace(1) undef
45    store volatile half %f16.add1, ptr addrspace(1) undef
46    store volatile float %f32.add, ptr addrspace(1) undef
47    ret void
48  }
49
50  define amdgpu_kernel void @add_i32_1_multi_f16_use() #0 {
51    %f16.val0 = load volatile half, ptr addrspace(1) undef
52    %f16.val1 = load volatile half, ptr addrspace(1) undef
53    %f16.add0 = fadd half %f16.val0, 0xH0001
54    %f16.add1 = fadd half %f16.val1, 0xH0001
55    store volatile half %f16.add0, ptr addrspace(1) undef
56    store volatile half %f16.add1,ptr addrspace(1) undef
57    ret void
58  }
59
60  define amdgpu_kernel void @add_i32_m2_one_f32_use_multi_f16_use () #0 {
61    %f16.val0 = load volatile half, ptr addrspace(1) undef
62    %f16.val1 = load volatile half, ptr addrspace(1) undef
63    %f32.val = load volatile float, ptr addrspace(1) undef
64    %f16.add0 = fadd half %f16.val0, 0xHFFFE
65    %f16.add1 = fadd half %f16.val1, 0xHFFFE
66    %f32.add = fadd float %f32.val, 0xffffffffc0000000
67    store volatile half %f16.add0, ptr addrspace(1) undef
68    store volatile half %f16.add1, ptr addrspace(1) undef
69    store volatile float %f32.add, ptr addrspace(1) undef
70    ret void
71  }
72
73  define amdgpu_kernel void @add_f16_1.0_multi_f32_use() #0 {
74    %f32.val0 = load volatile float, ptr addrspace(1) undef
75    %f32.val1 = load volatile float, ptr addrspace(1) undef
76    %f32.val = load volatile float, ptr addrspace(1) undef
77    %f32.add0 = fadd float %f32.val0, 1.0
78    %f32.add1 = fadd float %f32.val1, 1.0
79    store volatile float %f32.add0, ptr addrspace(1) undef
80    store volatile float %f32.add1, ptr addrspace(1) undef
81    ret void
82  }
83
84  define amdgpu_kernel void @add_f16_1.0_other_high_bits_multi_f16_use() #0 {
85    %f16.val0 = load volatile half, ptr addrspace(1) undef
86    %f16.val1 = load volatile half, ptr addrspace(1) undef
87    %f32.val = load volatile half, ptr addrspace(1) undef
88    %f16.add0 = fadd half %f16.val0, 0xH3C00
89    %f32.add = fadd half %f32.val, 1.000000e+00
90    store volatile half %f16.add0, ptr addrspace(1) undef
91    store volatile half %f32.add, ptr addrspace(1) undef
92    ret void
93  }
94
95  define amdgpu_kernel void @add_f16_1.0_other_high_bits_use_f16_f32() #0 {
96    %f16.val0 = load volatile half, ptr addrspace(1) undef
97    %f16.val1 = load volatile half, ptr addrspace(1) undef
98    %f32.val = load volatile half, ptr addrspace(1) undef
99    %f16.add0 = fadd half %f16.val0, 0xH3C00
100    %f32.add = fadd half %f32.val, 1.000000e+00
101    store volatile half %f16.add0, ptr addrspace(1) undef
102    store volatile half %f32.add, ptr addrspace(1) undef
103    ret void
104  }
105
106  attributes #0 = { nounwind }
107
108...
109---
110
111# f32 1.0 with a single use should be folded as the low 32-bits of a
112#  literal constant.
113
114# CHECK-LABEL: name: add_f32_1.0_one_f16_use
115# CHECK: %13:vgpr_32 = V_ADD_F16_e32  1065353216, killed %11, implicit $mode, implicit $exec
116
117name:            add_f32_1.0_one_f16_use
118alignment:       1
119exposesReturnsTwice: false
120legalized:       false
121regBankSelected: false
122selected:        false
123tracksRegLiveness: true
124registers:
125  - { id: 0, class: sreg_64 }
126  - { id: 1, class: sreg_32 }
127  - { id: 2, class: sgpr_32 }
128  - { id: 3, class: vgpr_32 }
129  - { id: 4, class: sreg_64 }
130  - { id: 5, class: sreg_32 }
131  - { id: 6, class: sreg_64 }
132  - { id: 7, class: sreg_32 }
133  - { id: 8, class: sreg_32 }
134  - { id: 9, class: sreg_32 }
135  - { id: 10, class: sgpr_128 }
136  - { id: 11, class: vgpr_32 }
137  - { id: 12, class: vgpr_32 }
138  - { id: 13, class: vgpr_32 }
139frameInfo:
140  isFrameAddressTaken: false
141  isReturnAddressTaken: false
142  hasStackMap:     false
143  hasPatchPoint:   false
144  stackSize:       0
145  offsetAdjustment: 0
146  maxAlignment:    0
147  adjustsStack:    false
148  hasCalls:        false
149  maxCallFrameSize: 0
150  hasOpaqueSPAdjustment: false
151  hasVAStart:      false
152  hasMustTailInVarArgFunc: false
153body:             |
154  bb.0 (%ir-block.0):
155    %4 = IMPLICIT_DEF
156    %5 = COPY %4.sub1
157    %6 = IMPLICIT_DEF
158    %7 = COPY %6.sub0
159    %8 = S_MOV_B32 61440
160    %9 = S_MOV_B32 -1
161    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
162    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
163    %12 = V_MOV_B32_e32 1065353216, implicit $exec
164    %13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $mode, implicit $exec
165    BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
166    S_ENDPGM 0
167
168...
169---
170# Materialized f32 inline immediate should not be folded into the f16
171# operands
172
173# CHECK-LABEL: name: add_f32_1.0_multi_f16_use
174# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1065353216, killed %11, implicit $mode, implicit $exec
175# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, killed %12, implicit $mode, implicit $exec
176
177
178name:            add_f32_1.0_multi_f16_use
179alignment:       1
180exposesReturnsTwice: false
181legalized:       false
182regBankSelected: false
183selected:        false
184tracksRegLiveness: true
185registers:
186  - { id: 0, class: sreg_64 }
187  - { id: 1, class: sreg_32 }
188  - { id: 2, class: sgpr_32 }
189  - { id: 3, class: vgpr_32 }
190  - { id: 4, class: sreg_64 }
191  - { id: 5, class: sreg_32 }
192  - { id: 6, class: sreg_64 }
193  - { id: 7, class: sreg_32 }
194  - { id: 8, class: sreg_32 }
195  - { id: 9, class: sreg_32 }
196  - { id: 10, class: sgpr_128 }
197  - { id: 11, class: vgpr_32 }
198  - { id: 12, class: vgpr_32 }
199  - { id: 13, class: vgpr_32 }
200  - { id: 14, class: vgpr_32 }
201  - { id: 15, class: vgpr_32 }
202frameInfo:
203  isFrameAddressTaken: false
204  isReturnAddressTaken: false
205  hasStackMap:     false
206  hasPatchPoint:   false
207  stackSize:       0
208  offsetAdjustment: 0
209  maxAlignment:    0
210  adjustsStack:    false
211  hasCalls:        false
212  maxCallFrameSize: 0
213  hasOpaqueSPAdjustment: false
214  hasVAStart:      false
215  hasMustTailInVarArgFunc: false
216body:             |
217  bb.0 (%ir-block.0):
218    %4 = IMPLICIT_DEF
219    %5 = COPY %4.sub1
220    %6 = IMPLICIT_DEF
221    %7 = COPY %6.sub0
222    %8 = S_MOV_B32 61440
223    %9 = S_MOV_B32 -1
224    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
225    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
226    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
227    %13 = V_MOV_B32_e32 1065353216, implicit $exec
228    %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
229    %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
230    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
231    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
232    S_ENDPGM 0
233
234...
235---
236
237# f32 1.0 should be folded into the single f32 use as an inline
238#  immediate, and folded into the single f16 use as a literal constant
239
240# CHECK-LABEL: name: add_f32_1.0_one_f32_use_one_f16_use
241# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $mode, implicit $exec
242# CHECK: %16:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec
243
244name:            add_f32_1.0_one_f32_use_one_f16_use
245alignment:       1
246exposesReturnsTwice: false
247legalized:       false
248regBankSelected: false
249selected:        false
250tracksRegLiveness: true
251registers:
252  - { id: 0, class: sreg_64 }
253  - { id: 1, class: sreg_32 }
254  - { id: 2, class: sgpr_32 }
255  - { id: 3, class: vgpr_32 }
256  - { id: 4, class: sreg_64 }
257  - { id: 5, class: sreg_32 }
258  - { id: 6, class: sreg_64 }
259  - { id: 7, class: sreg_32 }
260  - { id: 8, class: sreg_32 }
261  - { id: 9, class: sreg_32 }
262  - { id: 10, class: sgpr_128 }
263  - { id: 11, class: vgpr_32 }
264  - { id: 12, class: vgpr_32 }
265  - { id: 13, class: vgpr_32 }
266  - { id: 14, class: vgpr_32 }
267  - { id: 15, class: vgpr_32 }
268  - { id: 16, class: vgpr_32 }
269frameInfo:
270  isFrameAddressTaken: false
271  isReturnAddressTaken: false
272  hasStackMap:     false
273  hasPatchPoint:   false
274  stackSize:       0
275  offsetAdjustment: 0
276  maxAlignment:    0
277  adjustsStack:    false
278  hasCalls:        false
279  maxCallFrameSize: 0
280  hasOpaqueSPAdjustment: false
281  hasVAStart:      false
282  hasMustTailInVarArgFunc: false
283body:             |
284  bb.0 (%ir-block.0):
285    %4 = IMPLICIT_DEF
286    %5 = COPY %4.sub1
287    %6 = IMPLICIT_DEF
288    %7 = COPY %6.sub0
289    %8 = S_MOV_B32 61440
290    %9 = S_MOV_B32 -1
291    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
292    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
293    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
294    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
295    %14 = V_MOV_B32_e32 1065353216, implicit $exec
296    %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
297    %16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
298    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
299    BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
300    S_ENDPGM 0
301
302...
303---
304
305# f32 1.0 should be folded for the single f32 use as an inline
306#  constant, and not folded as a multi-use literal for the f16 cases
307
308# CHECK-LABEL: name: add_f32_1.0_one_f32_use_multi_f16_use
309# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1065353216, %11, implicit $mode, implicit $exec
310# CHECK: %16:vgpr_32 = V_ADD_F16_e32 1065353216, %12, implicit $mode, implicit $exec
311# CHECK: %17:vgpr_32 = V_ADD_F32_e32 1065353216, killed %13, implicit $mode, implicit $exec
312
313name:            add_f32_1.0_one_f32_use_multi_f16_use
314alignment:       1
315exposesReturnsTwice: false
316legalized:       false
317regBankSelected: false
318selected:        false
319tracksRegLiveness: true
320registers:
321  - { id: 0, class: sreg_64 }
322  - { id: 1, class: sreg_32 }
323  - { id: 2, class: sgpr_32 }
324  - { id: 3, class: vgpr_32 }
325  - { id: 4, class: sreg_64 }
326  - { id: 5, class: sreg_32 }
327  - { id: 6, class: sreg_64 }
328  - { id: 7, class: sreg_32 }
329  - { id: 8, class: sreg_32 }
330  - { id: 9, class: sreg_32 }
331  - { id: 10, class: sgpr_128 }
332  - { id: 11, class: vgpr_32 }
333  - { id: 12, class: vgpr_32 }
334  - { id: 13, class: vgpr_32 }
335  - { id: 14, class: vgpr_32 }
336  - { id: 15, class: vgpr_32 }
337  - { id: 16, class: vgpr_32 }
338  - { id: 17, class: vgpr_32 }
339frameInfo:
340  isFrameAddressTaken: false
341  isReturnAddressTaken: false
342  hasStackMap:     false
343  hasPatchPoint:   false
344  stackSize:       0
345  offsetAdjustment: 0
346  maxAlignment:    0
347  adjustsStack:    false
348  hasCalls:        false
349  maxCallFrameSize: 0
350  hasOpaqueSPAdjustment: false
351  hasVAStart:      false
352  hasMustTailInVarArgFunc: false
353body:             |
354  bb.0 (%ir-block.0):
355    %4 = IMPLICIT_DEF
356    %5 = COPY %4.sub1
357    %6 = IMPLICIT_DEF
358    %7 = COPY %6.sub0
359    %8 = S_MOV_B32 61440
360    %9 = S_MOV_B32 -1
361    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
362    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
363    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
364    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
365    %14 = V_MOV_B32_e32 1065353216, implicit $exec
366    %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
367    %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
368    %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
369    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
370    BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
371    BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
372    S_ENDPGM 0
373
374...
375---
376# CHECK-LABEL: name: add_i32_1_multi_f16_use
377# CHECK: %14:vgpr_32 = V_ADD_F16_e32 1, killed %11, implicit $mode, implicit $exec
378# CHECK: %15:vgpr_32 = V_ADD_F16_e32 1, killed %12, implicit $mode, implicit $exec
379
380
381name:            add_i32_1_multi_f16_use
382alignment:       1
383exposesReturnsTwice: false
384legalized:       false
385regBankSelected: false
386selected:        false
387tracksRegLiveness: true
388registers:
389  - { id: 0, class: sreg_64 }
390  - { id: 1, class: sreg_32 }
391  - { id: 2, class: sgpr_32 }
392  - { id: 3, class: vgpr_32 }
393  - { id: 4, class: sreg_64 }
394  - { id: 5, class: sreg_32 }
395  - { id: 6, class: sreg_64 }
396  - { id: 7, class: sreg_32 }
397  - { id: 8, class: sreg_32 }
398  - { id: 9, class: sreg_32 }
399  - { id: 10, class: sgpr_128 }
400  - { id: 11, class: vgpr_32 }
401  - { id: 12, class: vgpr_32 }
402  - { id: 13, class: vgpr_32 }
403  - { id: 14, class: vgpr_32 }
404  - { id: 15, class: vgpr_32 }
405frameInfo:
406  isFrameAddressTaken: false
407  isReturnAddressTaken: false
408  hasStackMap:     false
409  hasPatchPoint:   false
410  stackSize:       0
411  offsetAdjustment: 0
412  maxAlignment:    0
413  adjustsStack:    false
414  hasCalls:        false
415  maxCallFrameSize: 0
416  hasOpaqueSPAdjustment: false
417  hasVAStart:      false
418  hasMustTailInVarArgFunc: false
419body:             |
420  bb.0 (%ir-block.0):
421    %4 = IMPLICIT_DEF
422    %5 = COPY %4.sub1
423    %6 = IMPLICIT_DEF
424    %7 = COPY %6.sub0
425    %8 = S_MOV_B32 61440
426    %9 = S_MOV_B32 -1
427    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
428    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
429    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
430    %13 = V_MOV_B32_e32 1, implicit $exec
431    %14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $mode, implicit $exec
432    %15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $mode, implicit $exec
433    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
434    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
435    S_ENDPGM 0
436
437...
438---
439
440# CHECK-LABEL: name: add_i32_m2_one_f32_use_multi_f16_use
441# CHECK: %15:vgpr_32 = V_ADD_F16_e32 -2, %11, implicit $mode, implicit $exec
442# CHECK: %16:vgpr_32 = V_ADD_F16_e32 -2, %12, implicit $mode, implicit $exec
443# CHECK: %17:vgpr_32 = V_ADD_F32_e32 -2, killed %13, implicit $mode, implicit $exec
444
445name:            add_i32_m2_one_f32_use_multi_f16_use
446alignment:       1
447exposesReturnsTwice: false
448legalized:       false
449regBankSelected: false
450selected:        false
451tracksRegLiveness: true
452registers:
453  - { id: 0, class: sreg_64 }
454  - { id: 1, class: sreg_32 }
455  - { id: 2, class: sgpr_32 }
456  - { id: 3, class: vgpr_32 }
457  - { id: 4, class: sreg_64 }
458  - { id: 5, class: sreg_32 }
459  - { id: 6, class: sreg_64 }
460  - { id: 7, class: sreg_32 }
461  - { id: 8, class: sreg_32 }
462  - { id: 9, class: sreg_32 }
463  - { id: 10, class: sgpr_128 }
464  - { id: 11, class: vgpr_32 }
465  - { id: 12, class: vgpr_32 }
466  - { id: 13, class: vgpr_32 }
467  - { id: 14, class: vgpr_32 }
468  - { id: 15, class: vgpr_32 }
469  - { id: 16, class: vgpr_32 }
470  - { id: 17, class: vgpr_32 }
471frameInfo:
472  isFrameAddressTaken: false
473  isReturnAddressTaken: false
474  hasStackMap:     false
475  hasPatchPoint:   false
476  stackSize:       0
477  offsetAdjustment: 0
478  maxAlignment:    0
479  adjustsStack:    false
480  hasCalls:        false
481  maxCallFrameSize: 0
482  hasOpaqueSPAdjustment: false
483  hasVAStart:      false
484  hasMustTailInVarArgFunc: false
485body:             |
486  bb.0 (%ir-block.0):
487    %4 = IMPLICIT_DEF
488    %5 = COPY %4.sub1
489    %6 = IMPLICIT_DEF
490    %7 = COPY %6.sub0
491    %8 = S_MOV_B32 61440
492    %9 = S_MOV_B32 -1
493    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
494    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
495    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
496    %13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
497    %14 = V_MOV_B32_e32 -2, implicit $exec
498    %15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $mode, implicit $exec
499    %16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $mode, implicit $exec
500    %17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $mode, implicit $exec
501    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
502    BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
503    BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
504    S_ENDPGM 0
505
506...
507---
508
509# f32 1.0 should be folded for the single f32 use as an inline
510#  constant, and not folded as a multi-use literal for the f16 cases
511
512# CHECK-LABEL: name: add_f16_1.0_multi_f32_use
513# CHECK: %14:vgpr_32 = V_ADD_F32_e32 15360, %11, implicit $mode, implicit $exec
514# CHECK: %15:vgpr_32 = V_ADD_F32_e32 15360, %12, implicit $mode, implicit $exec
515
516name:            add_f16_1.0_multi_f32_use
517alignment:       1
518exposesReturnsTwice: false
519legalized:       false
520regBankSelected: false
521selected:        false
522tracksRegLiveness: true
523registers:
524  - { id: 0, class: sreg_64 }
525  - { id: 1, class: sreg_32 }
526  - { id: 2, class: sgpr_32 }
527  - { id: 3, class: vgpr_32 }
528  - { id: 4, class: sreg_64 }
529  - { id: 5, class: sreg_32 }
530  - { id: 6, class: sreg_64 }
531  - { id: 7, class: sreg_32 }
532  - { id: 8, class: sreg_32 }
533  - { id: 9, class: sreg_32 }
534  - { id: 10, class: sgpr_128 }
535  - { id: 11, class: vgpr_32 }
536  - { id: 12, class: vgpr_32 }
537  - { id: 13, class: vgpr_32 }
538  - { id: 14, class: vgpr_32 }
539  - { id: 15, class: vgpr_32 }
540frameInfo:
541  isFrameAddressTaken: false
542  isReturnAddressTaken: false
543  hasStackMap:     false
544  hasPatchPoint:   false
545  stackSize:       0
546  offsetAdjustment: 0
547  maxAlignment:    0
548  adjustsStack:    false
549  hasCalls:        false
550  maxCallFrameSize: 0
551  hasOpaqueSPAdjustment: false
552  hasVAStart:      false
553  hasMustTailInVarArgFunc: false
554body:             |
555  bb.0 (%ir-block.0):
556    %4 = IMPLICIT_DEF
557    %5 = COPY %4.sub1
558    %6 = IMPLICIT_DEF
559    %7 = COPY %6.sub0
560    %8 = S_MOV_B32 61440
561    %9 = S_MOV_B32 -1
562    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
563    %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
564    %12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
565    %13 = V_MOV_B32_e32 15360, implicit $exec
566    %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
567    %15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
568    BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
569    BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
570    S_ENDPGM 0
571
572...
573---
574
575# The low 16-bits are an inline immediate, but the high bits are junk
576
577# CHECK-LABEL: name: add_f16_1.0_other_high_bits_multi_f16_use
578# CHECK: %14:vgpr_32 = V_ADD_F16_e32 80886784, %11, implicit $mode, implicit $exec
579# CHECK: %15:vgpr_32 = V_ADD_F16_e32 80886784, %12, implicit $mode, implicit $exec
580
581name:            add_f16_1.0_other_high_bits_multi_f16_use
582alignment:       1
583exposesReturnsTwice: false
584legalized:       false
585regBankSelected: false
586selected:        false
587tracksRegLiveness: true
588registers:
589  - { id: 0, class: sreg_64 }
590  - { id: 1, class: sreg_32 }
591  - { id: 2, class: sgpr_32 }
592  - { id: 3, class: vgpr_32 }
593  - { id: 4, class: sreg_64 }
594  - { id: 5, class: sreg_32 }
595  - { id: 6, class: sreg_64 }
596  - { id: 7, class: sreg_32 }
597  - { id: 8, class: sreg_32 }
598  - { id: 9, class: sreg_32 }
599  - { id: 10, class: sgpr_128 }
600  - { id: 11, class: vgpr_32 }
601  - { id: 12, class: vgpr_32 }
602  - { id: 13, class: vgpr_32 }
603  - { id: 14, class: vgpr_32 }
604  - { id: 15, class: vgpr_32 }
605frameInfo:
606  isFrameAddressTaken: false
607  isReturnAddressTaken: false
608  hasStackMap:     false
609  hasPatchPoint:   false
610  stackSize:       0
611  offsetAdjustment: 0
612  maxAlignment:    0
613  adjustsStack:    false
614  hasCalls:        false
615  maxCallFrameSize: 0
616  hasOpaqueSPAdjustment: false
617  hasVAStart:      false
618  hasMustTailInVarArgFunc: false
619body:             |
620  bb.0 (%ir-block.0):
621    %4 = IMPLICIT_DEF
622    %5 = COPY %4.sub1
623    %6 = IMPLICIT_DEF
624    %7 = COPY %6.sub0
625    %8 = S_MOV_B32 61440
626    %9 = S_MOV_B32 -1
627    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
628    %11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
629    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
630    %13 = V_MOV_B32_e32 80886784, implicit $exec
631    %14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
632    %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
633    BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
634    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
635    S_ENDPGM 0
636
637...
638---
639
640# CHECK-LABEL: name: add_f16_1.0_other_high_bits_use_f16_f32
641# CHECK: %14:vgpr_32 = V_ADD_F32_e32 305413120, %11, implicit $mode, implicit $exec
642# CHECK: %15:vgpr_32 = V_ADD_F16_e32 305413120, %12, implicit $mode, implicit $exec
643name:            add_f16_1.0_other_high_bits_use_f16_f32
644alignment:       1
645exposesReturnsTwice: false
646legalized:       false
647regBankSelected: false
648selected:        false
649tracksRegLiveness: true
650registers:
651  - { id: 0, class: sreg_64 }
652  - { id: 1, class: sreg_32 }
653  - { id: 2, class: sgpr_32 }
654  - { id: 3, class: vgpr_32 }
655  - { id: 4, class: sreg_64 }
656  - { id: 5, class: sreg_32 }
657  - { id: 6, class: sreg_64 }
658  - { id: 7, class: sreg_32 }
659  - { id: 8, class: sreg_32 }
660  - { id: 9, class: sreg_32 }
661  - { id: 10, class: sgpr_128 }
662  - { id: 11, class: vgpr_32 }
663  - { id: 12, class: vgpr_32 }
664  - { id: 13, class: vgpr_32 }
665  - { id: 14, class: vgpr_32 }
666  - { id: 15, class: vgpr_32 }
667frameInfo:
668  isFrameAddressTaken: false
669  isReturnAddressTaken: false
670  hasStackMap:     false
671  hasPatchPoint:   false
672  stackSize:       0
673  offsetAdjustment: 0
674  maxAlignment:    0
675  adjustsStack:    false
676  hasCalls:        false
677  maxCallFrameSize: 0
678  hasOpaqueSPAdjustment: false
679  hasVAStart:      false
680  hasMustTailInVarArgFunc: false
681body:             |
682  bb.0 (%ir-block.0):
683    %4 = IMPLICIT_DEF
684    %5 = COPY %4.sub1
685    %6 = IMPLICIT_DEF
686    %7 = COPY %6.sub0
687    %8 = S_MOV_B32 61440
688    %9 = S_MOV_B32 -1
689    %10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
690    %11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s32) from `ptr addrspace(1) undef`)
691    %12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, implicit $exec :: (volatile load (s16) from `ptr addrspace(1) undef`)
692    %13 = V_MOV_B32_e32 305413120, implicit $exec
693    %14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $mode, implicit $exec
694    %15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $mode, implicit $exec
695    BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`)
696    BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, implicit $exec :: (volatile store (s16) into `ptr addrspace(1) undef`)
697    S_ENDPGM 0
698
699...
700