1; RUN: llc -mtriple=amdgcn -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s 2; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s 3; RUN: llc -mtriple=amdgcn -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,SI %s 4; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -enable-new-pm -stop-after=amdgpu-isel < %s | FileCheck -check-prefixes=GCN,FP16 %s 5 6 7define amdgpu_kernel void @divergent_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 8; GCN-LABEL: name: divergent_fneg_f32 9; GCN-LABEL: bb.0 (%ir-block.0) 10; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 11; GCN: V_XOR_B32_e64 killed %[[REG]] 12 13 %tid = call i32 @llvm.amdgcn.workitem.id.x() 14 %tid.ext = sext i32 %tid to i64 15 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext 16 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext 17 %val = load volatile float, ptr addrspace(1) %in.gep 18 %fneg = fneg float %val 19 store float %fneg, ptr addrspace(1) %out.gep 20 ret void 21} 22 23define amdgpu_kernel void @uniform_fneg_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) { 24; GCN-LABEL: name: uniform_fneg_f32 25; GCN-LABEL: bb.0 (%ir-block.0) 26; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 27; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]] 28 29 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx 30 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx 31 %val = load volatile float, ptr addrspace(1) %in.gep 32 %fneg = fneg float %val 33 store float %fneg, ptr addrspace(1) %out.gep 34 ret void 35} 36 37define amdgpu_kernel void @divergent_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 38; GCN-LABEL: name: divergent_fabs_f32 39; GCN-LABEL: bb.0 (%ir-block.0) 40; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 41; GCN: V_AND_B32_e64 killed %[[REG]] 42 43 %tid = call i32 @llvm.amdgcn.workitem.id.x() 44 %tid.ext = sext i32 %tid to i64 45 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext 46 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext 47 %val = load volatile float, ptr addrspace(1) %in.gep 48 %fabs = call float @llvm.fabs.f32(float %val) 49 store float %fabs, ptr addrspace(1) %out.gep 50 ret void 51} 52 53define amdgpu_kernel void @uniform_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) { 54; GCN-LABEL: name: uniform_fabs_f32 55; GCN-LABEL: bb.0 (%ir-block.0) 56; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 57; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]] 58 59 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx 60 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx 61 %val = load volatile float, ptr addrspace(1) %in.gep 62 %fabs = call float @llvm.fabs.f32(float %val) 63 store float %fabs, ptr addrspace(1) %out.gep 64 ret void 65} 66 67define amdgpu_kernel void @divergent_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 68; GCN-LABEL: name: divergent_fneg_fabs_f32 69; GCN-LABEL: bb.0 (%ir-block.0) 70; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 71; GCN: V_OR_B32_e64 killed %[[REG]] 72 73 %tid = call i32 @llvm.amdgcn.workitem.id.x() 74 %tid.ext = sext i32 %tid to i64 75 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %tid.ext 76 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %tid.ext 77 %val = load volatile float, ptr addrspace(1) %in.gep 78 %fabs = call float @llvm.fabs.f32(float %val) 79 %fneg = fneg float %fabs 80 store float %fneg, ptr addrspace(1) %out.gep 81 ret void 82} 83 84define amdgpu_kernel void @uniform_fneg_fabs_f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) { 85; GCN-LABEL: name: uniform_fneg_fabs_f32 86; GCN-LABEL: bb.0 (%ir-block.0) 87; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 88; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]] 89 90 %in.gep = getelementptr inbounds float, ptr addrspace(1) %in, i64 %idx 91 %out.gep = getelementptr inbounds float, ptr addrspace(1) %out, i64 %idx 92 %val = load volatile float, ptr addrspace(1) %in.gep 93 %fabs = call float @llvm.fabs.f32(float %val) 94 %fneg = fneg float %fabs 95 store float %fneg, ptr addrspace(1) %out.gep 96 ret void 97} 98 99 100define amdgpu_kernel void @divergent_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) { 101; GCN-LABEL: name: divergent_fabs_f16 102; GCN-LABEL: bb.0 (%ir-block.0) 103; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767 104; FP16: V_AND_B32_e64 killed %[[REG]] 105 106 %tid = call i32 @llvm.amdgcn.workitem.id.x() 107 %tid.ext = sext i32 %tid to i64 108 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext 109 %val = load volatile half, ptr addrspace(1) %in.gep 110 %fabs = call half @llvm.fabs.f16(half %val) 111 store half %fabs, ptr addrspace(1) %out 112 ret void 113} 114 115define amdgpu_kernel void @uniform_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) { 116; GCN-LABEL: name: uniform_fabs_f16 117; GCN-LABEL: bb.0 (%ir-block.0) 118; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32767 119; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]] 120 121 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx 122 %val = load volatile half, ptr addrspace(1) %in.gep 123 %fabs = call half @llvm.fabs.f16(half %val) 124 store half %fabs, ptr addrspace(1) %out 125 ret void 126} 127 128define amdgpu_kernel void @divergent_fneg_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) { 129; GCN-LABEL: name: divergent_fneg_f16 130; GCN-LABEL: bb.0 (%ir-block.0) 131; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768 132; FP16: V_XOR_B32_e64 killed %[[REG]] 133 134 %tid = call i32 @llvm.amdgcn.workitem.id.x() 135 %tid.ext = sext i32 %tid to i64 136 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext 137 %val = load volatile half, ptr addrspace(1) %in.gep 138 %fneg = fneg half %val 139 store half %fneg, ptr addrspace(1) %out 140 ret void 141} 142 143define amdgpu_kernel void @uniform_fneg_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) { 144; GCN-LABEL: name: uniform_fneg_f16 145; GCN-LABEL: bb.0 (%ir-block.0) 146; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768 147; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]] 148 149 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx 150 %val = load volatile half, ptr addrspace(1) %in.gep 151 %fneg = fneg half %val 152 store half %fneg, ptr addrspace(1) %out 153 ret void 154} 155 156define amdgpu_kernel void @divergent_fneg_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out) { 157; GCN-LABEL: name: divergent_fneg_fabs_f16 158; GCN-LABEL: bb.0 (%ir-block.0) 159; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768 160; FP16: V_OR_B32_e64 killed %[[REG]] 161 162 %tid = call i32 @llvm.amdgcn.workitem.id.x() 163 %tid.ext = sext i32 %tid to i64 164 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %tid.ext 165 %val = load volatile half, ptr addrspace(1) %in.gep 166 %fabs = call half @llvm.fabs.f16(half %val) 167 %fneg = fneg half %fabs 168 store half %fneg, ptr addrspace(1) %out 169 ret void 170} 171 172define amdgpu_kernel void @uniform_fneg_fabs_f16(ptr addrspace(1) %in, ptr addrspace(1) %out, i64 %idx) { 173; GCN-LABEL: name: uniform_fneg_fabs_f16 174; GCN-LABEL: bb.0 (%ir-block.0) 175; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 32768 176; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]] 177 178 %in.gep = getelementptr inbounds half, ptr addrspace(1) %in, i64 %idx 179 %val = load volatile half, ptr addrspace(1) %in.gep 180 %fabs = call half @llvm.fabs.f16(half %val) 181 %fneg = fneg half %fabs 182 store half %fneg, ptr addrspace(1) %out 183 ret void 184} 185 186define amdgpu_kernel void @divergent_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 187; GCN-LABEL: name: divergent_fneg_v2f16 188; GCN-LABEL: bb.0 (%ir-block.0) 189; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880 190; FP16: V_XOR_B32_e64 killed %[[REG]] 191 192 %tid = call i32 @llvm.amdgcn.workitem.id.x() 193 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid 194 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid 195 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2 196 %fneg = fneg <2 x half> %val 197 store <2 x half> %fneg, ptr addrspace(1) %gep.out 198 ret void 199} 200 201define amdgpu_kernel void @uniform_fneg_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) { 202; GCN-LABEL: name: uniform_fneg_v2f16 203; GCN-LABEL: bb.0 (%ir-block.0) 204; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880 205; GCN: S_XOR_B32 killed %{{[0-9]+}}, killed %[[REG]] 206 207 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx 208 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx 209 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2 210 %fneg = fneg <2 x half> %val 211 store <2 x half> %fneg, ptr addrspace(1) %gep.out 212 ret void 213} 214 215define amdgpu_kernel void @divergent_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 216; GCN-LABEL: name: divergent_fabs_v2f16 217; GCN-LABEL: bb.0 (%ir-block.0) 218; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 219; FP16: V_AND_B32_e64 killed %[[REG]] 220 221 %tid = call i32 @llvm.amdgcn.workitem.id.x() 222 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid 223 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid 224 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2 225 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) 226 store <2 x half> %fabs, ptr addrspace(1) %gep.out 227 ret void 228} 229 230define amdgpu_kernel void @uniform_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) { 231; GCN-LABEL: name: uniform_fabs_v2f16 232; GCN-LABEL: bb.0 (%ir-block.0) 233; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147450879 234; GCN: S_AND_B32 killed %{{[0-9]+}}, killed %[[REG]] 235 236 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx 237 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx 238 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2 239 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) 240 store <2 x half> %fabs, ptr addrspace(1) %gep.out 241 ret void 242} 243 244define amdgpu_kernel void @divergent_fneg_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in) { 245; GCN-LABEL: name: divergent_fneg_fabs_v2f16 246; GCN-LABEL: bb.0 (%ir-block.0) 247; FP16: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880 248; FP16: V_OR_B32_e64 killed %[[REG]] 249 250 %tid = call i32 @llvm.amdgcn.workitem.id.x() 251 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid 252 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %tid 253 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2 254 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) 255 %fneg = fneg <2 x half> %fabs 256 store <2 x half> %fneg, ptr addrspace(1) %gep.out 257 ret void 258} 259 260define amdgpu_kernel void @uniform_fneg_fabs_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) { 261; GCN-LABEL: name: uniform_fneg_fabs_v2f16 262; GCN-LABEL: bb.0 (%ir-block.0) 263; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147450880 264; GCN: S_OR_B32 killed %{{[0-9]+}}, killed %[[REG]] 265 266 %gep.in = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx 267 %gep.out = getelementptr inbounds <2 x half>, ptr addrspace(1) %in, i32 %idx 268 %val = load <2 x half>, ptr addrspace(1) %gep.in, align 2 269 %fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %val) 270 %fneg = fneg <2 x half> %fabs 271 store <2 x half> %fneg, ptr addrspace(1) %gep.out 272 ret void 273} 274 275define amdgpu_kernel void @divergent_fneg_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 276; GCN-LABEL: name: divergent_fneg_v2f32 277; GCN-LABEL: bb.0 (%ir-block.0) 278; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 279; GCN: V_XOR_B32_e64 %[[REG]] 280; GCN: V_XOR_B32_e64 %[[REG]] 281 282 %tid = call i32 @llvm.amdgcn.workitem.id.x() 283 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid 284 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid 285 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4 286 %fneg = fneg <2 x float> %val 287 store <2 x float> %fneg, ptr addrspace(1) %gep.out 288 ret void 289} 290 291define amdgpu_kernel void @uniform_fneg_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) { 292; GCN-LABEL: name: uniform_fneg_v2f32 293; GCN-LABEL: bb.0 (%ir-block.0) 294; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 295; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]] 296; GCN: S_XOR_B32 killed %{{[0-9]+}}, %[[REG]] 297 298 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx 299 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx 300 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4 301 %fneg = fneg <2 x float> %val 302 store <2 x float> %fneg, ptr addrspace(1) %gep.out 303 ret void 304} 305 306define amdgpu_kernel void @divergent_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 307; GCN-LABEL: name: divergent_fabs_v2f32 308; GCN-LABEL: bb.0 (%ir-block.0) 309; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 310; GCN: V_AND_B32_e64 %[[REG]] 311; GCN: V_AND_B32_e64 %[[REG]] 312 313 %tid = call i32 @llvm.amdgcn.workitem.id.x() 314 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid 315 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid 316 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4 317 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val) 318 store <2 x float> %fabs, ptr addrspace(1) %gep.out 319 ret void 320} 321 322define amdgpu_kernel void @uniform_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) { 323; GCN-LABEL: name: uniform_fabs_v2f32 324; GCN-LABEL: bb.0 (%ir-block.0) 325; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 326; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]] 327; GCN: S_AND_B32 killed %{{[0-9]+}}, %[[REG]] 328 329 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx 330 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx 331 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4 332 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val) 333 store <2 x float> %fabs, ptr addrspace(1) %gep.out 334 ret void 335} 336 337define amdgpu_kernel void @divergent_fneg_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in) { 338; GCN-LABEL: name: divergent_fneg_fabs_v2f32 339; GCN-LABEL: bb.0 (%ir-block.0) 340; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 341; GCN: V_OR_B32_e64 %[[REG]] 342; GCN: V_OR_B32_e64 %[[REG]] 343 344 %tid = call i32 @llvm.amdgcn.workitem.id.x() 345 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid 346 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %tid 347 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4 348 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val) 349 %fneg = fneg <2 x float> %fabs 350 store <2 x float> %fneg, ptr addrspace(1) %gep.out 351 ret void 352} 353 354define amdgpu_kernel void @uniform_fneg_fabs_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in, i32 %idx) { 355; GCN-LABEL: name: uniform_fneg_fabs_v2f32 356; GCN-LABEL: bb.0 (%ir-block.0) 357; GCN: %[[REG:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 358; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]] 359; GCN: S_OR_B32 killed %{{[0-9]+}}, %[[REG]] 360 361 %gep.in = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx 362 %gep.out = getelementptr inbounds <2 x float>, ptr addrspace(1) %in, i32 %idx 363 %val = load <2 x float>, ptr addrspace(1) %gep.in, align 4 364 %fabs = call <2 x float> @llvm.fabs.v2f32(<2 x float> %val) 365 %fneg = fneg <2 x float> %fabs 366 store <2 x float> %fneg, ptr addrspace(1) %gep.out 367 ret void 368} 369 370define amdgpu_kernel void @divergent_fneg_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { 371; GCN-LABEL: name: divergent_fneg_f64 372; GCN-LABEL: bb.0 (%ir-block.0) 373; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 374; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR 375; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 376; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 377; GCN: %[[XOR:[0-9]+]]:vgpr_32 = V_XOR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] 378; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 379; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR]], %subreg.sub1 380 381 382 %tid = call i32 @llvm.amdgcn.workitem.id.x() 383 %tid.ext = sext i32 %tid to i64 384 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext 385 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext 386 %val = load volatile double, ptr addrspace(1) %in.gep 387 %fneg = fneg double %val 388 store double %fneg, ptr addrspace(1) %out.gep 389 ret void 390} 391 392define amdgpu_kernel void @uniform_fneg_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) { 393; GCN-LABEL: name: uniform_fneg_f64 394; GCN-LABEL: bb.0 (%ir-block.0) 395; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 396; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR 397; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 398; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 399; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 400; GCN: %[[XOR:[0-9]+]]:sreg_32 = S_XOR_B32 killed %[[HI32]], killed %[[SREG_MASK]] 401; GCN: %[[XOR_COPY:[0-9]+]]:sreg_32 = COPY %[[XOR]] 402; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[XOR_COPY]], %subreg.sub1 403 404 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx 405 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx 406 %val = load volatile double, ptr addrspace(1) %in.gep 407 %fneg = fneg double %val 408 store double %fneg, ptr addrspace(1) %out.gep 409 ret void 410} 411 412define amdgpu_kernel void @divergent_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { 413; GCN-LABEL: name: divergent_fabs_f64 414; GCN-LABEL: bb.0 (%ir-block.0) 415; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 416; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR 417; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 418; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 419; GCN: %[[AND:[0-9]+]]:vgpr_32 = V_AND_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] 420; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 421; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND]], %subreg.sub1 422 423 424 %tid = call i32 @llvm.amdgcn.workitem.id.x() 425 %tid.ext = sext i32 %tid to i64 426 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext 427 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext 428 %val = load volatile double, ptr addrspace(1) %in.gep 429 %fabs = call double @llvm.fabs.f64(double %val) 430 store double %fabs, ptr addrspace(1) %out.gep 431 ret void 432} 433 434define amdgpu_kernel void @uniform_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) { 435; GCN-LABEL: name: uniform_fabs_f64 436; GCN-LABEL: bb.0 (%ir-block.0) 437; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 438; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR 439; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 440; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 441; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 2147483647 442; GCN: %[[AND:[0-9]+]]:sreg_32 = S_AND_B32 killed %[[HI32]], killed %[[SREG_MASK]] 443; GCN: %[[AND_COPY:[0-9]+]]:sreg_32 = COPY %[[AND]] 444; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[AND_COPY]], %subreg.sub1 445 446 447 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx 448 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx 449 %val = load volatile double, ptr addrspace(1) %in.gep 450 %fabs = call double @llvm.fabs.f64(double %val) 451 store double %fabs, ptr addrspace(1) %out.gep 452 ret void 453} 454 455define amdgpu_kernel void @divergent_fneg_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in) { 456; GCN-LABEL: name: divergent_fneg_fabs_f64 457; GCN-LABEL: bb.0 (%ir-block.0) 458; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 459; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR 460; GCN: %[[HI32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub1 461; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 462; GCN: %[[OR:[0-9]+]]:vgpr_32 = V_OR_B32_e64 killed %[[SREG_MASK]], killed %[[HI32]] 463; GCN: %[[LO32:[0-9]+]]:vgpr_32 = COPY %[[VREG64]].sub0 464; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR]], %subreg.sub1 465 466 467 %tid = call i32 @llvm.amdgcn.workitem.id.x() 468 %tid.ext = sext i32 %tid to i64 469 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %tid.ext 470 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %tid.ext 471 %val = load volatile double, ptr addrspace(1) %in.gep 472 %fabs = call double @llvm.fabs.f64(double %val) 473 %fneg = fneg double %fabs 474 store double %fneg, ptr addrspace(1) %out.gep 475 ret void 476} 477 478define amdgpu_kernel void @uniform_fneg_fabs_f64(ptr addrspace(1) %out, ptr addrspace(1) %in, i64 %idx) { 479; GCN-LABEL: name: uniform_fneg_fabs_f64 480; GCN-LABEL: bb.0 (%ir-block.0) 481; SI: %[[VREG64:[0-9]+]]:vreg_64 = BUFFER_LOAD_DWORDX2_ADDR64 482; FP16: %[[VREG64:[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2_SADDR 483; GCN: %[[LO32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub0 484; GCN: %[[HI32:[0-9]+]]:sreg_32 = COPY %[[VREG64]].sub1 485; GCN: %[[SREG_MASK:[0-9]+]]:sreg_32 = S_MOV_B32 -2147483648 486; GCN: %[[OR:[0-9]+]]:sreg_32 = S_OR_B32 killed %[[HI32]], killed %[[SREG_MASK]] 487; GCN: %[[OR_COPY:[0-9]+]]:sreg_32 = COPY %[[OR]] 488; GCN: REG_SEQUENCE killed %[[LO32]], %subreg.sub0, killed %[[OR_COPY]], %subreg.sub1 489 490 491 %in.gep = getelementptr inbounds double, ptr addrspace(1) %in, i64 %idx 492 %out.gep = getelementptr inbounds double, ptr addrspace(1) %out, i64 %idx 493 %val = load volatile double, ptr addrspace(1) %in.gep 494 %fabs = call double @llvm.fabs.f64(double %val) 495 %fneg = fneg double %fabs 496 store double %fneg, ptr addrspace(1) %out.gep 497 ret void 498} 499 500declare float @llvm.fabs.f32(float) 501declare half @llvm.fabs.f16(half) 502declare double @llvm.fabs.f64(double) 503declare <2 x half> @llvm.fabs.v2f16(<2 x half>) 504declare <2 x float> @llvm.fabs.v2f32(<2 x float>) 505 506declare i32 @llvm.amdgcn.workitem.id.x() 507