1# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py 2# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX8 %s 3# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer -verify-machineinstrs -o - %s | FileCheck -check-prefix=GFX9 %s 4 5--- | 6 7 define <2 x i16> @and_v2i16(<2 x i16> %a, <2 x i16> %b) #0 { 8 %and = and <2 x i16> %a, %b 9 ret <2 x i16> %and 10 } 11 12 define <3 x i16> @add_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { 13 %add = add <3 x i16> %a, %b 14 ret <3 x i16> %add 15 } 16 17 define <3 x i16> @shl_v3i16(<3 x i16> %a, <3 x i16> %b) #0 { 18 %shl = shl <3 x i16> %a, %b 19 ret <3 x i16> %shl 20 } 21 22 define <4 x half> @fma_v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) { 23 %fma = call <4 x half> @llvm.fma.v4f16(<4 x half> %a, <4 x half> %b, <4 x half> %c) 24 ret <4 x half> %fma 25 } 26 27 define amdgpu_ps <5 x half> @maxnum_v5i16(<5 x half> %a, <5 x half> %b) { 28 %fma = call <5 x half> @llvm.maxnum.v5f16(<5 x half> %a, <5 x half> %b) 29 ret <5 x half> %fma 30 } 31 32 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>) 33 declare <5 x half> @llvm.maxnum.v5f16(<5 x half>, <5 x half>) 34... 35 36--- 37name: and_v2i16 38body: | 39 bb.1: 40 liveins: $vgpr0, $vgpr1 41 42 ; GFX8-LABEL: name: and_v2i16 43 ; GFX8: liveins: $vgpr0, $vgpr1 44 ; GFX8-NEXT: {{ $}} 45 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 46 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 47 ; GFX8-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] 48 ; GFX8-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) 49 ; GFX8-NEXT: SI_RETURN implicit $vgpr0 50 ; 51 ; GFX9-LABEL: name: and_v2i16 52 ; GFX9: liveins: $vgpr0, $vgpr1 53 ; GFX9-NEXT: {{ $}} 54 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 55 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 56 ; GFX9-NEXT: [[AND:%[0-9]+]]:_(<2 x s16>) = G_AND [[COPY]], [[COPY1]] 57 ; GFX9-NEXT: $vgpr0 = COPY [[AND]](<2 x s16>) 58 ; GFX9-NEXT: SI_RETURN implicit $vgpr0 59 %0:_(<2 x s16>) = COPY $vgpr0 60 %1:_(<2 x s16>) = COPY $vgpr1 61 %3:_(<2 x s16>) = G_AND %0, %1 62 $vgpr0 = COPY %3(<2 x s16>) 63 SI_RETURN implicit $vgpr0 64... 65 66--- 67name: add_v3i16 68body: | 69 bb.1: 70 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 71 72 ; GFX8-LABEL: name: add_v3i16 73 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 74 ; GFX8-NEXT: {{ $}} 75 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 76 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 77 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) 78 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) 79 ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 80 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 81 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 82 ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) 83 ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) 84 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 85 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 86 ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) 87 ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) 88 ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 89 ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) 90 ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) 91 ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) 92 ; GFX8-NEXT: [[ADD:%[0-9]+]]:_(s16) = G_ADD [[TRUNC]], [[TRUNC3]] 93 ; GFX8-NEXT: [[ADD1:%[0-9]+]]:_(s16) = G_ADD [[TRUNC1]], [[TRUNC4]] 94 ; GFX8-NEXT: [[ADD2:%[0-9]+]]:_(s16) = G_ADD [[TRUNC2]], [[TRUNC5]] 95 ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ADD]](s16) 96 ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ADD1]](s16) 97 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) 98 ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] 99 ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 100 ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ADD2]](s16) 101 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 102 ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) 103 ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] 104 ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 105 ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) 106 ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) 107 ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 108 ; 109 ; GFX9-LABEL: name: add_v3i16 110 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 111 ; GFX9-NEXT: {{ $}} 112 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 113 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 114 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 115 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 116 ; GFX9-NEXT: [[ADD:%[0-9]+]]:_(<2 x s16>) = G_ADD [[COPY]], [[COPY2]] 117 ; GFX9-NEXT: [[ADD1:%[0-9]+]]:_(<2 x s16>) = G_ADD [[COPY1]], [[COPY3]] 118 ; GFX9-NEXT: $vgpr0 = COPY [[ADD]](<2 x s16>) 119 ; GFX9-NEXT: $vgpr1 = COPY [[ADD1]](<2 x s16>) 120 ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 121 %3:_(<2 x s16>) = COPY $vgpr0 122 %4:_(<2 x s16>) = COPY $vgpr1 123 %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) 124 %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) 125 %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) 126 %10:_(<2 x s16>) = COPY $vgpr2 127 %11:_(<2 x s16>) = COPY $vgpr3 128 %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) 129 %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) 130 %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) 131 %17:_(<3 x s16>) = G_ADD %0, %1 132 %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) 133 %24:_(s16) = G_IMPLICIT_DEF 134 %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) 135 %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) 136 $vgpr0 = COPY %19(<2 x s16>) 137 $vgpr1 = COPY %20(<2 x s16>) 138 SI_RETURN implicit $vgpr0, implicit $vgpr1 139... 140 141--- 142name: shl_v3i16 143body: | 144 bb.1: 145 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 146 147 ; GFX8-LABEL: name: shl_v3i16 148 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 149 ; GFX8-NEXT: {{ $}} 150 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 151 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 152 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) 153 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) 154 ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 155 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 156 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 157 ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) 158 ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) 159 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 160 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 161 ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) 162 ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) 163 ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 164 ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) 165 ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) 166 ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) 167 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC3]](s16) 168 ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC1]], [[TRUNC4]](s16) 169 ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s16) = G_SHL [[TRUNC2]], [[TRUNC5]](s16) 170 ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[SHL]](s16) 171 ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[SHL1]](s16) 172 ; GFX8-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) 173 ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL3]] 174 ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 175 ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[SHL2]](s16) 176 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 177 ; GFX8-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) 178 ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL4]] 179 ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 180 ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST4]](<2 x s16>) 181 ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST5]](<2 x s16>) 182 ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 183 ; 184 ; GFX9-LABEL: name: shl_v3i16 185 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 186 ; GFX9-NEXT: {{ $}} 187 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 188 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 189 ; GFX9-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) 190 ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) 191 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 192 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 193 ; GFX9-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) 194 ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) 195 ; GFX9-NEXT: [[SHL:%[0-9]+]]:_(<2 x s16>) = G_SHL [[COPY]], [[COPY2]](<2 x s16>) 196 ; GFX9-NEXT: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[TRUNC]], [[TRUNC1]](s16) 197 ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(s16) = G_IMPLICIT_DEF 198 ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SHL1]](s16), [[DEF]](s16) 199 ; GFX9-NEXT: $vgpr0 = COPY [[SHL]](<2 x s16>) 200 ; GFX9-NEXT: $vgpr1 = COPY [[BUILD_VECTOR]](<2 x s16>) 201 ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 202 %3:_(<2 x s16>) = COPY $vgpr0 203 %4:_(<2 x s16>) = COPY $vgpr1 204 %5:_(<4 x s16>) = G_CONCAT_VECTORS %3(<2 x s16>), %4(<2 x s16>) 205 %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16) = G_UNMERGE_VALUES %5(<4 x s16>) 206 %0:_(<3 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16) 207 %10:_(<2 x s16>) = COPY $vgpr2 208 %11:_(<2 x s16>) = COPY $vgpr3 209 %12:_(<4 x s16>) = G_CONCAT_VECTORS %10(<2 x s16>), %11(<2 x s16>) 210 %13:_(s16), %14:_(s16), %15:_(s16), %16:_(s16) = G_UNMERGE_VALUES %12(<4 x s16>) 211 %1:_(<3 x s16>) = G_BUILD_VECTOR %13(s16), %14(s16), %15(s16) 212 %17:_(<3 x s16>) = G_SHL %0, %1(<3 x s16>) 213 %21:_(s16), %22:_(s16), %23:_(s16) = G_UNMERGE_VALUES %17(<3 x s16>) 214 %24:_(s16) = G_IMPLICIT_DEF 215 %25:_(<4 x s16>) = G_BUILD_VECTOR %21(s16), %22(s16), %23(s16), %24(s16) 216 %19:_(<2 x s16>), %20:_(<2 x s16>) = G_UNMERGE_VALUES %25(<4 x s16>) 217 $vgpr0 = COPY %19(<2 x s16>) 218 $vgpr1 = COPY %20(<2 x s16>) 219 SI_RETURN implicit $vgpr0, implicit $vgpr1 220... 221 222--- 223name: fma_v4f16 224body: | 225 bb.1: 226 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 227 228 ; GFX8-LABEL: name: fma_v4f16 229 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 230 ; GFX8-NEXT: {{ $}} 231 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 232 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 233 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 234 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 235 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 236 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 237 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) 238 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) 239 ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 240 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 241 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 242 ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) 243 ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) 244 ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) 245 ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) 246 ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) 247 ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) 248 ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST2]], [[C]](s32) 249 ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) 250 ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) 251 ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) 252 ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) 253 ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) 254 ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) 255 ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) 256 ; GFX8-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) 257 ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR4]](s32) 258 ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) 259 ; GFX8-NEXT: [[TRUNC10:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) 260 ; GFX8-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST5]], [[C]](s32) 261 ; GFX8-NEXT: [[TRUNC11:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR5]](s32) 262 ; GFX8-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC4]], [[TRUNC8]] 263 ; GFX8-NEXT: [[FMA1:%[0-9]+]]:_(s16) = G_FMA [[TRUNC1]], [[TRUNC5]], [[TRUNC9]] 264 ; GFX8-NEXT: [[FMA2:%[0-9]+]]:_(s16) = G_FMA [[TRUNC2]], [[TRUNC6]], [[TRUNC10]] 265 ; GFX8-NEXT: [[FMA3:%[0-9]+]]:_(s16) = G_FMA [[TRUNC3]], [[TRUNC7]], [[TRUNC11]] 266 ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMA]](s16) 267 ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMA1]](s16) 268 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) 269 ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] 270 ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 271 ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMA2]](s16) 272 ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMA3]](s16) 273 ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) 274 ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] 275 ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 276 ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) 277 ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) 278 ; GFX8-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 279 ; 280 ; GFX9-LABEL: name: fma_v4f16 281 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 282 ; GFX9-NEXT: {{ $}} 283 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 284 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 285 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 286 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 287 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 288 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 289 ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY]], [[COPY2]], [[COPY4]] 290 ; GFX9-NEXT: [[FMA1:%[0-9]+]]:_(<2 x s16>) = G_FMA [[COPY1]], [[COPY3]], [[COPY5]] 291 ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](<2 x s16>) 292 ; GFX9-NEXT: $vgpr1 = COPY [[FMA1]](<2 x s16>) 293 ; GFX9-NEXT: SI_RETURN implicit $vgpr0, implicit $vgpr1 294 %4:_(<2 x s16>) = COPY $vgpr0 295 %5:_(<2 x s16>) = COPY $vgpr1 296 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) 297 %6:_(<2 x s16>) = COPY $vgpr2 298 %7:_(<2 x s16>) = COPY $vgpr3 299 %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) 300 %8:_(<2 x s16>) = COPY $vgpr4 301 %9:_(<2 x s16>) = COPY $vgpr5 302 %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) 303 %10:_(<4 x s16>) = G_FMA %0, %1, %2 304 %12:_(<2 x s16>), %13:_(<2 x s16>) = G_UNMERGE_VALUES %10(<4 x s16>) 305 $vgpr0 = COPY %12(<2 x s16>) 306 $vgpr1 = COPY %13(<2 x s16>) 307 SI_RETURN implicit $vgpr0, implicit $vgpr1 308... 309 310--- 311name: maxnum_v5i16 312body: | 313 bb.1: 314 liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 315 316 ; GFX8-LABEL: name: maxnum_v5i16 317 ; GFX8: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 318 ; GFX8-NEXT: {{ $}} 319 ; GFX8-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 320 ; GFX8-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 321 ; GFX8-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 322 ; GFX8-NEXT: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>) 323 ; GFX8-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST]](s32) 324 ; GFX8-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 325 ; GFX8-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST]], [[C]](s32) 326 ; GFX8-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32) 327 ; GFX8-NEXT: [[BITCAST1:%[0-9]+]]:_(s32) = G_BITCAST [[COPY1]](<2 x s16>) 328 ; GFX8-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST1]](s32) 329 ; GFX8-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST1]], [[C]](s32) 330 ; GFX8-NEXT: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32) 331 ; GFX8-NEXT: [[BITCAST2:%[0-9]+]]:_(s32) = G_BITCAST [[COPY2]](<2 x s16>) 332 ; GFX8-NEXT: [[TRUNC4:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST2]](s32) 333 ; GFX8-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 334 ; GFX8-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 335 ; GFX8-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 336 ; GFX8-NEXT: [[BITCAST3:%[0-9]+]]:_(s32) = G_BITCAST [[COPY3]](<2 x s16>) 337 ; GFX8-NEXT: [[TRUNC5:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST3]](s32) 338 ; GFX8-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32) 339 ; GFX8-NEXT: [[TRUNC6:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR2]](s32) 340 ; GFX8-NEXT: [[BITCAST4:%[0-9]+]]:_(s32) = G_BITCAST [[COPY4]](<2 x s16>) 341 ; GFX8-NEXT: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST4]](s32) 342 ; GFX8-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST4]], [[C]](s32) 343 ; GFX8-NEXT: [[TRUNC8:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32) 344 ; GFX8-NEXT: [[BITCAST5:%[0-9]+]]:_(s32) = G_BITCAST [[COPY5]](<2 x s16>) 345 ; GFX8-NEXT: [[TRUNC9:%[0-9]+]]:_(s16) = G_TRUNC [[BITCAST5]](s32) 346 ; GFX8-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]] 347 ; GFX8-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]] 348 ; GFX8-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] 349 ; GFX8-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]] 350 ; GFX8-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]] 351 ; GFX8-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] 352 ; GFX8-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]] 353 ; GFX8-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]] 354 ; GFX8-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] 355 ; GFX8-NEXT: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]] 356 ; GFX8-NEXT: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC8]] 357 ; GFX8-NEXT: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]] 358 ; GFX8-NEXT: [[FCANONICALIZE8:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]] 359 ; GFX8-NEXT: [[FCANONICALIZE9:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC9]] 360 ; GFX8-NEXT: [[FMAXNUM_IEEE4:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE8]], [[FCANONICALIZE9]] 361 ; GFX8-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16) 362 ; GFX8-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16) 363 ; GFX8-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) 364 ; GFX8-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] 365 ; GFX8-NEXT: [[BITCAST6:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) 366 ; GFX8-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16) 367 ; GFX8-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16) 368 ; GFX8-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32) 369 ; GFX8-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]] 370 ; GFX8-NEXT: [[BITCAST7:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32) 371 ; GFX8-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE4]](s16) 372 ; GFX8-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 373 ; GFX8-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32) 374 ; GFX8-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT4]], [[SHL2]] 375 ; GFX8-NEXT: [[BITCAST8:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32) 376 ; GFX8-NEXT: $vgpr0 = COPY [[BITCAST6]](<2 x s16>) 377 ; GFX8-NEXT: $vgpr1 = COPY [[BITCAST7]](<2 x s16>) 378 ; GFX8-NEXT: $vgpr2 = COPY [[BITCAST8]](<2 x s16>) 379 ; GFX8-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 380 ; 381 ; GFX9-LABEL: name: maxnum_v5i16 382 ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 383 ; GFX9-NEXT: {{ $}} 384 ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 385 ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 386 ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 387 ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 388 ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 389 ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 390 ; GFX9-NEXT: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]] 391 ; GFX9-NEXT: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY3]] 392 ; GFX9-NEXT: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]] 393 ; GFX9-NEXT: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]] 394 ; GFX9-NEXT: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY4]] 395 ; GFX9-NEXT: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]] 396 ; GFX9-NEXT: [[FCANONICALIZE4:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY2]] 397 ; GFX9-NEXT: [[FCANONICALIZE5:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY5]] 398 ; GFX9-NEXT: [[FMAXNUM_IEEE2:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]] 399 ; GFX9-NEXT: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>) 400 ; GFX9-NEXT: $vgpr1 = COPY [[FMAXNUM_IEEE1]](<2 x s16>) 401 ; GFX9-NEXT: $vgpr2 = COPY [[FMAXNUM_IEEE2]](<2 x s16>) 402 ; GFX9-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 403 %2:_(<2 x s16>) = COPY $vgpr0 404 %3:_(<2 x s16>) = COPY $vgpr1 405 %4:_(<2 x s16>) = COPY $vgpr2 406 %5:_(<6 x s16>) = G_CONCAT_VECTORS %2(<2 x s16>), %3(<2 x s16>), %4(<2 x s16>) 407 %6:_(s16), %7:_(s16), %8:_(s16), %9:_(s16), %10:_(s16), %11:_(s16) = G_UNMERGE_VALUES %5(<6 x s16>) 408 %0:_(<5 x s16>) = G_BUILD_VECTOR %6(s16), %7(s16), %8(s16), %9(s16), %10(s16) 409 %12:_(<2 x s16>) = COPY $vgpr3 410 %13:_(<2 x s16>) = COPY $vgpr4 411 %14:_(<2 x s16>) = COPY $vgpr5 412 %15:_(<6 x s16>) = G_CONCAT_VECTORS %12(<2 x s16>), %13(<2 x s16>), %14(<2 x s16>) 413 %16:_(s16), %17:_(s16), %18:_(s16), %19:_(s16), %20:_(s16), %21:_(s16) = G_UNMERGE_VALUES %15(<6 x s16>) 414 %1:_(<5 x s16>) = G_BUILD_VECTOR %16(s16), %17(s16), %18(s16), %19(s16), %20(s16) 415 %23:_(<5 x s16>) = G_FMAXNUM %0, %1 416 %27:_(s16), %28:_(s16), %29:_(s16), %30:_(s16), %31:_(s16) = G_UNMERGE_VALUES %23(<5 x s16>) 417 %32:_(s16) = G_IMPLICIT_DEF 418 %33:_(<6 x s16>) = G_BUILD_VECTOR %27(s16), %28(s16), %29(s16), %30(s16), %31(s16), %32(s16) 419 %24:_(<2 x s16>), %25:_(<2 x s16>), %26:_(<2 x s16>) = G_UNMERGE_VALUES %33(<6 x s16>) 420 $vgpr0 = COPY %24(<2 x s16>) 421 $vgpr1 = COPY %25(<2 x s16>) 422 $vgpr2 = COPY %26(<2 x s16>) 423 SI_RETURN_TO_EPILOG implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 424... 425