1; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s 2; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink -amdgpu-enable-ocl-mangling-mismatch-workaround=0 <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s 3; RUN: opt -S -O1 -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s 4; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-simplify-libcall < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-POSTLINK %s 5; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-simplify-libcall -amdgpu-prelink -amdgpu-enable-ocl-mangling-mismatch-workaround=0 <%s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-PRELINK %s 6; RUN: opt -S -passes='default<O1>' -mtriple=amdgcn-- -amdgpu-use-native -amdgpu-prelink < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NATIVE %s 7 8; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos 9; GCN-POSTLINK: call fast float @_Z3sinf( 10; GCN-POSTLINK: call fast float @_Z3cosf( 11; GCN-PRELINK: call fast float @_Z6sincosfPU3AS5f( 12; GCN-NATIVE: call fast float @_Z10native_sinf( 13; GCN-NATIVE: call fast float @_Z10native_cosf( 14define amdgpu_kernel void @test_sincos(ptr addrspace(1) nocapture %a) { 15entry: 16 %tmp = load float, ptr addrspace(1) %a, align 4 17 %call = call fast float @_Z3sinf(float %tmp) 18 store float %call, ptr addrspace(1) %a, align 4 19 %call2 = call fast float @_Z3cosf(float %tmp) 20 %arrayidx3 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 21 store float %call2, ptr addrspace(1) %arrayidx3, align 4 22 ret void 23} 24 25declare float @_Z3sinf(float) 26 27declare float @_Z3cosf(float) 28 29; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v2 30; GCN-POSTLINK: call fast <2 x float> @_Z3sinDv2_f( 31; GCN-POSTLINK: call fast <2 x float> @_Z3cosDv2_f( 32; GCN-PRELINK: call fast <2 x float> @_Z6sincosDv2_fPU3AS5S_( 33; GCN-NATIVE: call fast <2 x float> @_Z10native_sinDv2_f( 34; GCN-NATIVE: call fast <2 x float> @_Z10native_cosDv2_f( 35define amdgpu_kernel void @test_sincos_v2(ptr addrspace(1) nocapture %a) { 36entry: 37 %tmp = load <2 x float>, ptr addrspace(1) %a, align 8 38 %call = call fast <2 x float> @_Z3sinDv2_f(<2 x float> %tmp) 39 store <2 x float> %call, ptr addrspace(1) %a, align 8 40 %call2 = call fast <2 x float> @_Z3cosDv2_f(<2 x float> %tmp) 41 %arrayidx3 = getelementptr inbounds <2 x float>, ptr addrspace(1) %a, i64 1 42 store <2 x float> %call2, ptr addrspace(1) %arrayidx3, align 8 43 ret void 44} 45 46declare <2 x float> @_Z3sinDv2_f(<2 x float>) 47 48declare <2 x float> @_Z3cosDv2_f(<2 x float>) 49 50; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v3 51; GCN-POSTLINK: call fast <3 x float> @_Z3sinDv3_f( 52; GCN-POSTLINK: call fast <3 x float> @_Z3cosDv3_f( 53; GCN-PRELINK: call fast <3 x float> @_Z6sincosDv3_fPU3AS5S_( 54; GCN-NATIVE: call fast <3 x float> @_Z10native_sinDv3_f( 55; GCN-NATIVE: call fast <3 x float> @_Z10native_cosDv3_f( 56define amdgpu_kernel void @test_sincos_v3(ptr addrspace(1) nocapture %a) { 57entry: 58 %loadVec4 = load <4 x float>, ptr addrspace(1) %a, align 16 59 %extractVec4 = shufflevector <4 x float> %loadVec4, <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2> 60 %call = call fast <3 x float> @_Z3sinDv3_f(<3 x float> %extractVec4) 61 %extractVec6 = shufflevector <3 x float> %call, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 62 store <4 x float> %extractVec6, ptr addrspace(1) %a, align 16 63 %call11 = call fast <3 x float> @_Z3cosDv3_f(<3 x float> %extractVec4) 64 %arrayidx12 = getelementptr inbounds <3 x float>, ptr addrspace(1) %a, i64 1 65 %extractVec13 = shufflevector <3 x float> %call11, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef> 66 store <4 x float> %extractVec13, ptr addrspace(1) %arrayidx12, align 16 67 ret void 68} 69 70declare <3 x float> @_Z3sinDv3_f(<3 x float>) 71 72declare <3 x float> @_Z3cosDv3_f(<3 x float>) 73 74; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v4 75; GCN-POSTLINK: call fast <4 x float> @_Z3sinDv4_f( 76; GCN-POSTLINK: call fast <4 x float> @_Z3cosDv4_f( 77; GCN-PRELINK: call fast <4 x float> @_Z6sincosDv4_fPU3AS5S_( 78; GCN-NATIVE: call fast <4 x float> @_Z10native_sinDv4_f( 79; GCN-NATIVE: call fast <4 x float> @_Z10native_cosDv4_f( 80define amdgpu_kernel void @test_sincos_v4(ptr addrspace(1) nocapture %a) { 81entry: 82 %tmp = load <4 x float>, ptr addrspace(1) %a, align 16 83 %call = call fast <4 x float> @_Z3sinDv4_f(<4 x float> %tmp) 84 store <4 x float> %call, ptr addrspace(1) %a, align 16 85 %call2 = call fast <4 x float> @_Z3cosDv4_f(<4 x float> %tmp) 86 %arrayidx3 = getelementptr inbounds <4 x float>, ptr addrspace(1) %a, i64 1 87 store <4 x float> %call2, ptr addrspace(1) %arrayidx3, align 16 88 ret void 89} 90 91declare <4 x float> @_Z3sinDv4_f(<4 x float>) 92 93declare <4 x float> @_Z3cosDv4_f(<4 x float>) 94 95; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v8 96; GCN-POSTLINK: call fast <8 x float> @_Z3sinDv8_f( 97; GCN-POSTLINK: call fast <8 x float> @_Z3cosDv8_f( 98; GCN-PRELINK: call fast <8 x float> @_Z6sincosDv8_fPU3AS5S_( 99; GCN-NATIVE: call fast <8 x float> @_Z10native_sinDv8_f( 100; GCN-NATIVE: call fast <8 x float> @_Z10native_cosDv8_f( 101define amdgpu_kernel void @test_sincos_v8(ptr addrspace(1) nocapture %a) { 102entry: 103 %tmp = load <8 x float>, ptr addrspace(1) %a, align 32 104 %call = call fast <8 x float> @_Z3sinDv8_f(<8 x float> %tmp) 105 store <8 x float> %call, ptr addrspace(1) %a, align 32 106 %call2 = call fast <8 x float> @_Z3cosDv8_f(<8 x float> %tmp) 107 %arrayidx3 = getelementptr inbounds <8 x float>, ptr addrspace(1) %a, i64 1 108 store <8 x float> %call2, ptr addrspace(1) %arrayidx3, align 32 109 ret void 110} 111 112declare <8 x float> @_Z3sinDv8_f(<8 x float>) 113 114declare <8 x float> @_Z3cosDv8_f(<8 x float>) 115 116; GCN-LABEL: {{^}}define amdgpu_kernel void @test_sincos_v16 117; GCN-POSTLINK: call fast <16 x float> @_Z3sinDv16_f( 118; GCN-POSTLINK: call fast <16 x float> @_Z3cosDv16_f( 119; GCN-PRELINK: call fast <16 x float> @_Z6sincosDv16_fPU3AS5S_( 120; GCN-NATIVE: call fast <16 x float> @_Z10native_sinDv16_f( 121; GCN-NATIVE: call fast <16 x float> @_Z10native_cosDv16_f( 122define amdgpu_kernel void @test_sincos_v16(ptr addrspace(1) nocapture %a) { 123entry: 124 %tmp = load <16 x float>, ptr addrspace(1) %a, align 64 125 %call = call fast <16 x float> @_Z3sinDv16_f(<16 x float> %tmp) 126 store <16 x float> %call, ptr addrspace(1) %a, align 64 127 %call2 = call fast <16 x float> @_Z3cosDv16_f(<16 x float> %tmp) 128 %arrayidx3 = getelementptr inbounds <16 x float>, ptr addrspace(1) %a, i64 1 129 store <16 x float> %call2, ptr addrspace(1) %arrayidx3, align 64 130 ret void 131} 132 133declare <16 x float> @_Z3sinDv16_f(<16 x float>) 134 135declare <16 x float> @_Z3cosDv16_f(<16 x float>) 136 137; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_recip 138; GCN: %call = tail call fast float @_Z12native_recipf(float 3.000000e+00) 139define amdgpu_kernel void @test_native_recip(ptr addrspace(1) nocapture %a) { 140entry: 141 %call = call fast float @_Z12native_recipf(float 3.000000e+00) 142 store float %call, ptr addrspace(1) %a, align 4 143 ret void 144} 145 146declare float @_Z12native_recipf(float) 147 148; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_recip 149; GCN: %call = tail call fast float @_Z10half_recipf(float 3.000000e+00) 150define amdgpu_kernel void @test_half_recip(ptr addrspace(1) nocapture %a) { 151entry: 152 %call = call fast float @_Z10half_recipf(float 3.000000e+00) 153 store float %call, ptr addrspace(1) %a, align 4 154 ret void 155} 156 157declare float @_Z10half_recipf(float) 158 159; Do nothing, the underlying implementation will optimize correctly 160; after inlining. 161; GCN-LABEL: {{^}}define amdgpu_kernel void @test_native_divide 162; GCN: %call = tail call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) 163define amdgpu_kernel void @test_native_divide(ptr addrspace(1) nocapture %a) { 164entry: 165 %tmp = load float, ptr addrspace(1) %a, align 4 166 %call = call fast float @_Z13native_divideff(float %tmp, float 3.000000e+00) 167 store float %call, ptr addrspace(1) %a, align 4 168 ret void 169} 170 171declare float @_Z13native_divideff(float, float) 172 173; Do nothing, the optimization will naturally happen after inlining. 174 175; GCN-LABEL: {{^}}define amdgpu_kernel void @test_half_divide 176; GCN: %call = tail call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) 177define amdgpu_kernel void @test_half_divide(ptr addrspace(1) nocapture %a) { 178entry: 179 %tmp = load float, ptr addrspace(1) %a, align 4 180 %call = call fast float @_Z11half_divideff(float %tmp, float 3.000000e+00) 181 store float %call, ptr addrspace(1) %a, align 4 182 ret void 183} 184 185declare float @_Z11half_divideff(float, float) 186 187; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0f 188; GCN: store float 1.000000e+00, ptr addrspace(1) %a 189define amdgpu_kernel void @test_pow_0f(ptr addrspace(1) nocapture %a) { 190entry: 191 %tmp = load float, ptr addrspace(1) %a, align 4 192 %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) 193 store float %call, ptr addrspace(1) %a, align 4 194 ret void 195} 196 197declare float @_Z3powff(float, float) 198 199; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_0i 200; GCN: store float 1.000000e+00, ptr addrspace(1) %a 201define amdgpu_kernel void @test_pow_0i(ptr addrspace(1) nocapture %a) { 202entry: 203 %tmp = load float, ptr addrspace(1) %a, align 4 204 %call = call fast float @_Z3powff(float %tmp, float 0.000000e+00) 205 store float %call, ptr addrspace(1) %a, align 4 206 ret void 207} 208 209; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1f 210; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 211; GCN: store float %tmp, ptr addrspace(1) %a, align 4 212define amdgpu_kernel void @test_pow_1f(ptr addrspace(1) nocapture %a) { 213entry: 214 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 215 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 216 %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) 217 store float %call, ptr addrspace(1) %a, align 4 218 ret void 219} 220 221; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_1i 222; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 223; GCN: store float %tmp, ptr addrspace(1) %a, align 4 224define amdgpu_kernel void @test_pow_1i(ptr addrspace(1) nocapture %a) { 225entry: 226 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 227 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 228 %call = call fast float @_Z3powff(float %tmp, float 1.000000e+00) 229 store float %call, ptr addrspace(1) %a, align 4 230 ret void 231} 232 233; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2f 234; GCN: %tmp = load float, ptr addrspace(1) %a, align 4 235; GCN: %__pow2 = fmul fast float %tmp, %tmp 236define amdgpu_kernel void @test_pow_2f(ptr addrspace(1) nocapture %a) { 237entry: 238 %tmp = load float, ptr addrspace(1) %a, align 4 239 %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) 240 store float %call, ptr addrspace(1) %a, align 4 241 ret void 242} 243 244; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_2i 245; GCN: %tmp = load float, ptr addrspace(1) %a, align 4 246; GCN: %__pow2 = fmul fast float %tmp, %tmp 247define amdgpu_kernel void @test_pow_2i(ptr addrspace(1) nocapture %a) { 248entry: 249 %tmp = load float, ptr addrspace(1) %a, align 4 250 %call = call fast float @_Z3powff(float %tmp, float 2.000000e+00) 251 store float %call, ptr addrspace(1) %a, align 4 252 ret void 253} 254 255; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1f 256; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 257; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp 258define amdgpu_kernel void @test_pow_m1f(ptr addrspace(1) nocapture %a) { 259entry: 260 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 261 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 262 %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) 263 store float %call, ptr addrspace(1) %a, align 4 264 ret void 265} 266 267; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_m1i 268; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 269; GCN: %__powrecip = fdiv fast float 1.000000e+00, %tmp 270define amdgpu_kernel void @test_pow_m1i(ptr addrspace(1) nocapture %a) { 271entry: 272 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 273 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 274 %call = call fast float @_Z3powff(float %tmp, float -1.000000e+00) 275 store float %call, ptr addrspace(1) %a, align 4 276 ret void 277} 278 279; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_half 280; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float 5.000000e-01) 281; GCN-PRELINK: %__pow2sqrt = tail call fast float @llvm.sqrt.f32(float %tmp) 282define amdgpu_kernel void @test_pow_half(ptr addrspace(1) nocapture %a) { 283entry: 284 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 285 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 286 %call = call fast float @_Z3powff(float %tmp, float 5.000000e-01) 287 store float %call, ptr addrspace(1) %a, align 4 288 ret void 289} 290 291; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_mhalf 292; GCN-POSTLINK: call fast float @_Z3powff(float %tmp, float -5.000000e-01) 293; GCN-PRELINK: %__pow2rsqrt = tail call fast float @_Z5rsqrtf(float %tmp) 294define amdgpu_kernel void @test_pow_mhalf(ptr addrspace(1) nocapture %a) { 295entry: 296 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 297 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 298 %call = call fast float @_Z3powff(float %tmp, float -5.000000e-01) 299 store float %call, ptr addrspace(1) %a, align 4 300 ret void 301} 302 303; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow_c 304; GCN: %__powx2 = fmul fast float %tmp, %tmp 305; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 306; GCN: %__powx22 = fmul fast float %__powx2, %tmp 307; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 308; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 309define amdgpu_kernel void @test_pow_c(ptr addrspace(1) nocapture %a) { 310entry: 311 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 312 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 313 %call = call fast float @_Z3powff(float %tmp, float 1.100000e+01) 314 store float %call, ptr addrspace(1) %a, align 4 315 ret void 316} 317 318; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr_c 319; GCN: %__powx2 = fmul fast float %tmp, %tmp 320; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 321; GCN: %__powx22 = fmul fast float %__powx2, %tmp 322; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 323; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 324define amdgpu_kernel void @test_powr_c(ptr addrspace(1) nocapture %a) { 325entry: 326 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 327 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 328 %call = call fast float @_Z4powrff(float %tmp, float 1.100000e+01) 329 store float %call, ptr addrspace(1) %a, align 4 330 ret void 331} 332 333declare float @_Z4powrff(float, float) 334 335; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown_c 336; GCN: %__powx2 = fmul fast float %tmp, %tmp 337; GCN: %__powx21 = fmul fast float %__powx2, %__powx2 338; GCN: %__powx22 = fmul fast float %__powx2, %tmp 339; GCN: %[[r0:.*]] = fmul fast float %__powx21, %__powx21 340; GCN: %__powprod3 = fmul fast float %[[r0]], %__powx22 341define amdgpu_kernel void @test_pown_c(ptr addrspace(1) nocapture %a) { 342entry: 343 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 344 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 345 %call = call fast float @_Z4pownfi(float %tmp, i32 11) 346 store float %call, ptr addrspace(1) %a, align 4 347 ret void 348} 349 350declare half @_Z4pownDhi(half, i32) 351 352; GCN-LABEL: {{^}}define half @test_pown_f16( 353; GCN-NATIVE: %__fabs = tail call fast half @llvm.fabs.f16(half %x) 354; GCN-NATIVE: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs) 355; GCN-NATIVE: %pownI2F = sitofp i32 %y to half 356; GCN-NATIVE: %__ylogx = fmul fast half %__log2, %pownI2F 357; GCN-NATIVE: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx) 358; GCN-NATIVE: %__ytou = trunc i32 %y to i16 359; GCN-NATIVE: %__yeven = shl i16 %__ytou, 15 360; GCN-NATIVE: %0 = bitcast half %x to i16 361; GCN-NATIVE: %__pow_sign = and i16 %__yeven, %0 362; GCN-NATIVE: %1 = bitcast half %__exp2 to i16 363; GCN-NATIVE: %2 = or disjoint i16 %__pow_sign, %1 364; GCN-NATIVE: %3 = bitcast i16 %2 to half 365define half @test_pown_f16(half %x, i32 %y) { 366entry: 367 %call = call fast half @_Z4pownDhi(half %x, i32 %y) 368 ret half %call 369} 370 371declare float @_Z4pownfi(float, i32) 372 373; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pow 374; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp) 375; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs) 376; GCN: %__ylogx = fmul fast float %__log2, 1.013000e+03 377; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx) 378; GCN: %[[r0:.*]] = tail call float @llvm.copysign.f32(float %__exp2, float %tmp) 379; GCN: store float %[[r0]], ptr addrspace(1) %a, align 4 380define amdgpu_kernel void @test_pow(ptr addrspace(1) nocapture %a) { 381entry: 382 %tmp = load float, ptr addrspace(1) %a, align 4 383 %call = call fast float @_Z3powff(float %tmp, float 1.013000e+03) 384 store float %call, ptr addrspace(1) %a, align 4 385 ret void 386} 387 388; GCN-LABEL: {{^}}define amdgpu_kernel void @test_powr 389; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %tmp) 390; GCN: %__ylogx = fmul fast float %tmp1, %__log2 391; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx) 392; GCN: store float %__exp2, ptr addrspace(1) %a, align 4 393define amdgpu_kernel void @test_powr(ptr addrspace(1) nocapture %a) { 394entry: 395 %tmp = load float, ptr addrspace(1) %a, align 4 396 %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 397 %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4 398 %call = call fast float @_Z4powrff(float %tmp, float %tmp1) 399 store float %call, ptr addrspace(1) %a, align 4 400 ret void 401} 402 403; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pown 404; GCN: %conv = fptosi float %tmp1 to i32 405; GCN: %__fabs = tail call fast float @llvm.fabs.f32(float %tmp) 406; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %__fabs) 407; GCN: %pownI2F = sitofp i32 %conv to float 408; GCN: %__ylogx = fmul fast float %__log2, %pownI2F 409; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx) 410; GCN: %__yeven = shl i32 %conv, 31 411; GCN: %[[r0:.*]] = bitcast float %tmp to i32 412; GCN: %__pow_sign = and i32 %__yeven, %[[r0]] 413; GCN: %[[r1:.*]] = bitcast float %__exp2 to i32 414; GCN: %[[r2:.*]] = or disjoint i32 %__pow_sign, %[[r1]] 415; GCN: store i32 %[[r2]], ptr addrspace(1) %a, align 4 416define amdgpu_kernel void @test_pown(ptr addrspace(1) nocapture %a) { 417entry: 418 %tmp = load float, ptr addrspace(1) %a, align 4 419 %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 420 %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4 421 %conv = fptosi float %tmp1 to i32 422 %call = call fast float @_Z4pownfi(float %tmp, i32 %conv) 423 store float %call, ptr addrspace(1) %a, align 4 424 ret void 425} 426 427declare half @_Z3powDhDh(half, half) 428declare <2 x half> @_Z3powDv2_DhS_(<2 x half>, <2 x half>) 429 430; GCN-LABEL: define half @test_pow_fast_f16__y_13(half %x) 431; GCN: %__fabs = tail call fast half @llvm.fabs.f16(half %x) 432; GCN: %__log2 = tail call fast half @llvm.log2.f16(half %__fabs) 433; GCN: %__ylogx = fmul fast half %__log2, 0xH4A80 434; GCN: %__exp2 = tail call fast half @llvm.exp2.f16(half %__ylogx) 435; GCN: %1 = tail call half @llvm.copysign.f16(half %__exp2, half %x) 436define half @test_pow_fast_f16__y_13(half %x) { 437 %powr = tail call fast half @_Z3powDhDh(half %x, half 13.0) 438 ret half %powr 439} 440 441; GCN-LABEL: define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) 442; GCN: %__fabs = tail call fast <2 x half> @llvm.fabs.v2f16(<2 x half> %x) 443; GCN: %__log2 = tail call fast <2 x half> @llvm.log2.v2f16(<2 x half> %__fabs) 444; GCN: %__ylogx = fmul fast <2 x half> %__log2, splat (half 0xH4A80) 445; GCN: %__exp2 = tail call fast <2 x half> @llvm.exp2.v2f16(<2 x half> %__ylogx) 446; GCN: %1 = tail call <2 x half> @llvm.copysign.v2f16(<2 x half> %__exp2, <2 x half> %x) 447define <2 x half> @test_pow_fast_v2f16__y_13(<2 x half> %x) { 448 %powr = tail call fast <2 x half> @_Z3powDv2_DhS_(<2 x half> %x, <2 x half> <half 13.0, half 13.0>) 449 ret <2 x half> %powr 450} 451 452; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_1 453; GCN: %tmp = load float, ptr addrspace(1) %arrayidx, align 4 454; GCN: store float %tmp, ptr addrspace(1) %a, align 4 455define amdgpu_kernel void @test_rootn_1(ptr addrspace(1) nocapture %a) { 456entry: 457 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 458 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 459 %call = call fast float @_Z5rootnfi(float %tmp, i32 1) 460 store float %call, ptr addrspace(1) %a, align 4 461 ret void 462} 463 464declare float @_Z5rootnfi(float, i32) 465 466; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_2 467; GCN: call fast float @llvm.sqrt.f32(float %tmp) 468define amdgpu_kernel void @test_rootn_2(ptr addrspace(1) nocapture %a) { 469entry: 470 %tmp = load float, ptr addrspace(1) %a, align 4 471 %call = call fast float @_Z5rootnfi(float %tmp, i32 2) 472 store float %call, ptr addrspace(1) %a, align 4 473 ret void 474} 475 476; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_3 477; GCN-POSTLINK: call fast float @_Z5rootnfi(float %tmp, i32 3) 478; GCN-PRELINK: %__rootn2cbrt = tail call fast float @_Z4cbrtf(float %tmp) 479define amdgpu_kernel void @test_rootn_3(ptr addrspace(1) nocapture %a) { 480entry: 481 %tmp = load float, ptr addrspace(1) %a, align 4 482 %call = call fast float @_Z5rootnfi(float %tmp, i32 3) 483 store float %call, ptr addrspace(1) %a, align 4 484 ret void 485} 486 487; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m1 488; GCN: fdiv fast float 1.000000e+00, %tmp 489define amdgpu_kernel void @test_rootn_m1(ptr addrspace(1) nocapture %a) { 490entry: 491 %tmp = load float, ptr addrspace(1) %a, align 4 492 %call = call fast float @_Z5rootnfi(float %tmp, i32 -1) 493 store float %call, ptr addrspace(1) %a, align 4 494 ret void 495} 496 497; GCN-LABEL: {{^}}define amdgpu_kernel void @test_rootn_m2 498; GCN: [[SQRT:%.+]] = tail call fast float @llvm.sqrt.f32(float %tmp) 499; GCN-NEXT: fdiv fast float 1.000000e+00, [[SQRT]] 500define amdgpu_kernel void @test_rootn_m2(ptr addrspace(1) nocapture %a) { 501entry: 502 %tmp = load float, ptr addrspace(1) %a, align 4 503 %call = call fast float @_Z5rootnfi(float %tmp, i32 -2) 504 store float %call, ptr addrspace(1) %a, align 4 505 ret void 506} 507 508; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_0x 509; GCN: store float %y 510define amdgpu_kernel void @test_fma_0x(ptr addrspace(1) nocapture %a, float %y) { 511entry: 512 %tmp = load float, ptr addrspace(1) %a, align 4 513 %call = call fast float @_Z3fmafff(float 0.000000e+00, float %tmp, float %y) 514 store float %call, ptr addrspace(1) %a, align 4 515 ret void 516} 517 518declare float @_Z3fmafff(float, float, float) 519 520; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x0 521; GCN: store float %y, 522define amdgpu_kernel void @test_fma_x0(ptr addrspace(1) nocapture %a, float %y) { 523entry: 524 %tmp = load float, ptr addrspace(1) %a, align 4 525 %call = call fast float @_Z3fmafff(float %tmp, float 0.000000e+00, float %y) 526 store float %call, ptr addrspace(1) %a, align 4 527 ret void 528} 529 530; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_0x 531; GCN: store float %y, 532define amdgpu_kernel void @test_mad_0x(ptr addrspace(1) nocapture %a, float %y) { 533entry: 534 %tmp = load float, ptr addrspace(1) %a, align 4 535 %call = call fast float @_Z3madfff(float 0.000000e+00, float %tmp, float %y) 536 store float %call, ptr addrspace(1) %a, align 4 537 ret void 538} 539 540declare float @_Z3madfff(float, float, float) 541 542; GCN-LABEL: {{^}}define amdgpu_kernel void @test_mad_x0 543; GCN: store float %y, 544define amdgpu_kernel void @test_mad_x0(ptr addrspace(1) nocapture %a, float %y) { 545entry: 546 %tmp = load float, ptr addrspace(1) %a, align 4 547 %call = call fast float @_Z3madfff(float %tmp, float 0.000000e+00, float %y) 548 store float %call, ptr addrspace(1) %a, align 4 549 ret void 550} 551 552; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_x1y 553; GCN: %call = fadd fast float %tmp, %y 554define amdgpu_kernel void @test_fma_x1y(ptr addrspace(1) nocapture %a, float %y) { 555entry: 556 %tmp = load float, ptr addrspace(1) %a, align 4 557 %call = call fast float @_Z3fmafff(float %tmp, float 1.000000e+00, float %y) 558 store float %call, ptr addrspace(1) %a, align 4 559 ret void 560} 561 562; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_1xy 563; GCN: %call = fadd fast float %tmp, %y 564define amdgpu_kernel void @test_fma_1xy(ptr addrspace(1) nocapture %a, float %y) { 565entry: 566 %tmp = load float, ptr addrspace(1) %a, align 4 567 %call = call fast float @_Z3fmafff(float 1.000000e+00, float %tmp, float %y) 568 store float %call, ptr addrspace(1) %a, align 4 569 ret void 570} 571 572; GCN-LABEL: {{^}}define amdgpu_kernel void @test_fma_xy0 573; GCN: %call = fmul fast float %tmp1, %tmp 574define amdgpu_kernel void @test_fma_xy0(ptr addrspace(1) nocapture %a) { 575entry: 576 %arrayidx = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 577 %tmp = load float, ptr addrspace(1) %arrayidx, align 4 578 %tmp1 = load float, ptr addrspace(1) %a, align 4 579 %call = call fast float @_Z3fmafff(float %tmp, float %tmp1, float 0.000000e+00) 580 store float %call, ptr addrspace(1) %a, align 4 581 ret void 582} 583 584; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp 585; GCN-NATIVE: call fast float @llvm.exp.f32(float %tmp) 586define amdgpu_kernel void @test_use_native_exp(ptr addrspace(1) nocapture %a) { 587entry: 588 %tmp = load float, ptr addrspace(1) %a, align 4 589 %call = call fast float @_Z3expf(float %tmp) 590 store float %call, ptr addrspace(1) %a, align 4 591 ret void 592} 593 594declare float @_Z3expf(float) 595 596; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp2 597; GCN-NATIVE: call fast float @llvm.exp2.f32(float %tmp) 598define amdgpu_kernel void @test_use_native_exp2(ptr addrspace(1) nocapture %a) { 599entry: 600 %tmp = load float, ptr addrspace(1) %a, align 4 601 %call = call fast float @_Z4exp2f(float %tmp) 602 store float %call, ptr addrspace(1) %a, align 4 603 ret void 604} 605 606declare float @_Z4exp2f(float) 607 608; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_exp10 609; GCN-NATIVE: call fast float @_Z12native_exp10f(float %tmp) 610define amdgpu_kernel void @test_use_native_exp10(ptr addrspace(1) nocapture %a) { 611entry: 612 %tmp = load float, ptr addrspace(1) %a, align 4 613 %call = call fast float @_Z5exp10f(float %tmp) 614 store float %call, ptr addrspace(1) %a, align 4 615 ret void 616} 617 618declare float @_Z5exp10f(float) 619 620; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log 621; GCN-NATIVE: call fast float @llvm.log.f32(float %tmp) 622define amdgpu_kernel void @test_use_native_log(ptr addrspace(1) nocapture %a) { 623entry: 624 %tmp = load float, ptr addrspace(1) %a, align 4 625 %call = call fast float @_Z3logf(float %tmp) 626 store float %call, ptr addrspace(1) %a, align 4 627 ret void 628} 629 630declare float @_Z3logf(float) 631 632; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log2 633; GCN-NATIVE: call fast float @llvm.log2.f32(float %tmp) 634define amdgpu_kernel void @test_use_native_log2(ptr addrspace(1) nocapture %a) { 635entry: 636 %tmp = load float, ptr addrspace(1) %a, align 4 637 %call = call fast float @_Z4log2f(float %tmp) 638 store float %call, ptr addrspace(1) %a, align 4 639 ret void 640} 641 642declare float @_Z4log2f(float) 643 644; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_log10 645; GCN-NATIVE: call fast float @llvm.log10.f32(float %tmp) 646define amdgpu_kernel void @test_use_native_log10(ptr addrspace(1) nocapture %a) { 647entry: 648 %tmp = load float, ptr addrspace(1) %a, align 4 649 %call = call fast float @_Z5log10f(float %tmp) 650 store float %call, ptr addrspace(1) %a, align 4 651 ret void 652} 653 654declare float @_Z5log10f(float) 655 656; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr 657; GCN: %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4 658; GCN: %__log2 = tail call fast float @llvm.log2.f32(float %tmp) 659; GCN: %__ylogx = fmul fast float %tmp1, %__log2 660; GCN: %__exp2 = tail call fast float @llvm.exp2.f32(float %__ylogx) 661; GCN: store float %__exp2, ptr addrspace(1) %a, align 4 662define amdgpu_kernel void @test_use_native_powr(ptr addrspace(1) nocapture %a) { 663entry: 664 %tmp = load float, ptr addrspace(1) %a, align 4 665 %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 666 %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4 667 %call = call fast float @_Z4powrff(float %tmp, float %tmp1) 668 store float %call, ptr addrspace(1) %a, align 4 669 ret void 670} 671 672; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_powr_nobuiltin 673; GCN: %call = tail call fast float @_Z4powrff(float %tmp, float %tmp1) 674define amdgpu_kernel void @test_use_native_powr_nobuiltin(ptr addrspace(1) nocapture %a) { 675entry: 676 %tmp = load float, ptr addrspace(1) %a, align 4 677 %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 678 %tmp1 = load float, ptr addrspace(1) %arrayidx1, align 4 679 %call = call fast float @_Z4powrff(float %tmp, float %tmp1) nobuiltin 680 store float %call, ptr addrspace(1) %a, align 4 681 ret void 682} 683 684; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sqrt 685; GCN-NATIVE: call fast float @llvm.sqrt.f32(float %tmp) 686define amdgpu_kernel void @test_use_native_sqrt(ptr addrspace(1) nocapture %a) { 687entry: 688 %tmp = load float, ptr addrspace(1) %a, align 4 689 %call = call fast float @_Z4sqrtf(float %tmp) 690 store float %call, ptr addrspace(1) %a, align 4 691 ret void 692} 693 694; GCN-LABEL: {{^}}define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64 695; GCN: call fast double @llvm.sqrt.f64(double %tmp) 696define amdgpu_kernel void @test_dont_use_native_sqrt_fast_f64(ptr addrspace(1) nocapture %a) { 697entry: 698 %tmp = load double, ptr addrspace(1) %a, align 8 699 %call = call fast double @_Z4sqrtd(double %tmp) 700 store double %call, ptr addrspace(1) %a, align 8 701 ret void 702} 703 704declare float @_Z4sqrtf(float) 705declare double @_Z4sqrtd(double) 706 707; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_rsqrt 708; GCN-NATIVE: call fast float @_Z12native_rsqrtf(float %tmp) 709define amdgpu_kernel void @test_use_native_rsqrt(ptr addrspace(1) nocapture %a) { 710entry: 711 %tmp = load float, ptr addrspace(1) %a, align 4 712 %call = call fast float @_Z5rsqrtf(float %tmp) 713 store float %call, ptr addrspace(1) %a, align 4 714 ret void 715} 716 717declare float @_Z5rsqrtf(float) 718 719; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_tan 720; GCN-NATIVE: call fast float @_Z10native_tanf(float %tmp) 721define amdgpu_kernel void @test_use_native_tan(ptr addrspace(1) nocapture %a) { 722entry: 723 %tmp = load float, ptr addrspace(1) %a, align 4 724 %call = call fast float @_Z3tanf(float %tmp) 725 store float %call, ptr addrspace(1) %a, align 4 726 ret void 727} 728 729declare float @_Z3tanf(float) 730 731; GCN-LABEL: {{^}}define amdgpu_kernel void @test_use_native_sincos 732; GCN-NATIVE: call float @_Z10native_sinf(float %tmp) 733; GCN-NATIVE: call float @_Z10native_cosf(float %tmp) 734define amdgpu_kernel void @test_use_native_sincos(ptr addrspace(1) %a) { 735entry: 736 %tmp = load float, ptr addrspace(1) %a, align 4 737 %arrayidx1 = getelementptr inbounds float, ptr addrspace(1) %a, i64 1 738 %tmp1 = addrspacecast ptr addrspace(1) %arrayidx1 to ptr 739 %call = call fast float @_Z6sincosfPf(float %tmp, ptr %tmp1) 740 store float %call, ptr addrspace(1) %a, align 4 741 ret void 742} 743 744declare float @_Z6sincosfPf(float, ptr) 745 746%opencl.pipe_t = type opaque 747%opencl.reserve_id_t = type opaque 748 749; GCN-LABEL: {{^}}define amdgpu_kernel void @test_read_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) 750; GCN-PRELINK: call i32 @__read_pipe_2_4(ptr addrspace(1) %{{.*}}, ptr %{{.*}}) #[[$NOUNWIND:[0-9]+]] 751; GCN-PRELINK: call i32 @__read_pipe_4_4(ptr addrspace(1) %{{.*}}, ptr addrspace(5) %{{.*}}, i32 2, ptr %{{.*}}) #[[$NOUNWIND]] 752define amdgpu_kernel void @test_read_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) local_unnamed_addr { 753entry: 754 %tmp1 = addrspacecast ptr addrspace(1) %ptr to ptr 755 %tmp2 = call i32 @__read_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) #0 756 %tmp3 = call ptr addrspace(5) @__reserve_read_pipe(ptr addrspace(1) %p, i32 2, i32 4, i32 4) 757 %tmp4 = call i32 @__read_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) #0 758 call void @__commit_read_pipe(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 4, i32 4) 759 ret void 760} 761 762declare i32 @__read_pipe_2(ptr addrspace(1), ptr, i32, i32) 763 764declare ptr addrspace(5) @__reserve_read_pipe(ptr addrspace(1), i32, i32, i32) 765 766declare i32 @__read_pipe_4(ptr addrspace(1), ptr addrspace(5), i32, ptr, i32, i32) 767 768declare void @__commit_read_pipe(ptr addrspace(1), ptr addrspace(5), i32, i32) 769 770; GCN-LABEL: {{^}}define amdgpu_kernel void @test_write_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) 771; GCN-PRELINK: call i32 @__write_pipe_2_4(ptr addrspace(1) %{{.*}}, ptr %{{.*}}) #[[$NOUNWIND]] 772; GCN-PRELINK: call i32 @__write_pipe_4_4(ptr addrspace(1) %{{.*}}, ptr addrspace(5) %{{.*}}, i32 2, ptr %{{.*}}) #[[$NOUNWIND]] 773define amdgpu_kernel void @test_write_pipe(ptr addrspace(1) %p, ptr addrspace(1) %ptr) local_unnamed_addr { 774entry: 775 %tmp1 = addrspacecast ptr addrspace(1) %ptr to ptr 776 %tmp2 = call i32 @__write_pipe_2(ptr addrspace(1) %p, ptr %tmp1, i32 4, i32 4) #0 777 %tmp3 = call ptr addrspace(5) @__reserve_write_pipe(ptr addrspace(1) %p, i32 2, i32 4, i32 4) #0 778 %tmp4 = call i32 @__write_pipe_4(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 2, ptr %tmp1, i32 4, i32 4) #0 779 call void @__commit_write_pipe(ptr addrspace(1) %p, ptr addrspace(5) %tmp3, i32 4, i32 4) #0 780 ret void 781} 782 783declare i32 @__write_pipe_2(ptr addrspace(1), ptr, i32, i32) local_unnamed_addr 784 785declare ptr addrspace(5) @__reserve_write_pipe(ptr addrspace(1), i32, i32, i32) local_unnamed_addr 786 787declare i32 @__write_pipe_4(ptr addrspace(1), ptr addrspace(5), i32, ptr, i32, i32) local_unnamed_addr 788 789declare void @__commit_write_pipe(ptr addrspace(1), ptr addrspace(5), i32, i32) local_unnamed_addr 790 791%struct.S = type { [100 x i32] } 792 793; GCN-LABEL: {{^}}define amdgpu_kernel void @test_pipe_size 794; GCN-PRELINK: call i32 @__read_pipe_2_1(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]] 795; GCN-PRELINK: call i32 @__read_pipe_2_2(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]] 796; GCN-PRELINK: call i32 @__read_pipe_2_4(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]] 797; GCN-PRELINK: call i32 @__read_pipe_2_8(ptr addrspace(1) %{{.*}} ptr %{{.*}}) #[[$NOUNWIND]] 798; GCN-PRELINK: call i32 @__read_pipe_2_16(ptr addrspace(1) %{{.*}}, ptr %{{.*}}) #[[$NOUNWIND]] 799; GCN-PRELINK: call i32 @__read_pipe_2_32(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]] 800; GCN-PRELINK: call i32 @__read_pipe_2_64(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]] 801; GCN-PRELINK: call i32 @__read_pipe_2_128(ptr addrspace(1) %{{.*}}, ptr %{{.*}} #[[$NOUNWIND]] 802; GCN-PRELINK: call i32 @__read_pipe_2(ptr addrspace(1) %{{.*}}, ptr %{{.*}} i32 400, i32 4) #[[$NOUNWIND]] 803define amdgpu_kernel void @test_pipe_size(ptr addrspace(1) %p1, ptr addrspace(1) %ptr1, ptr addrspace(1) %p2, ptr addrspace(1) %ptr2, ptr addrspace(1) %p4, ptr addrspace(1) %ptr4, ptr addrspace(1) %p8, ptr addrspace(1) %ptr8, ptr addrspace(1) %p16, ptr addrspace(1) %ptr16, ptr addrspace(1) %p32, ptr addrspace(1) %ptr32, ptr addrspace(1) %p64, ptr addrspace(1) %ptr64, ptr addrspace(1) %p128, ptr addrspace(1) %ptr128, ptr addrspace(1) %pu, ptr addrspace(1) %ptru) local_unnamed_addr #0 { 804entry: 805 %tmp = addrspacecast ptr addrspace(1) %ptr1 to ptr 806 %tmp1 = call i32 @__read_pipe_2(ptr addrspace(1) %p1, ptr %tmp, i32 1, i32 1) #0 807 %tmp3 = addrspacecast ptr addrspace(1) %ptr2 to ptr 808 %tmp4 = call i32 @__read_pipe_2(ptr addrspace(1) %p2, ptr %tmp3, i32 2, i32 2) #0 809 %tmp6 = addrspacecast ptr addrspace(1) %ptr4 to ptr 810 %tmp7 = call i32 @__read_pipe_2(ptr addrspace(1) %p4, ptr %tmp6, i32 4, i32 4) #0 811 %tmp9 = addrspacecast ptr addrspace(1) %ptr8 to ptr 812 %tmp10 = call i32 @__read_pipe_2(ptr addrspace(1) %p8, ptr %tmp9, i32 8, i32 8) #0 813 %tmp12 = addrspacecast ptr addrspace(1) %ptr16 to ptr 814 %tmp13 = call i32 @__read_pipe_2(ptr addrspace(1) %p16, ptr %tmp12, i32 16, i32 16) #0 815 %tmp15 = addrspacecast ptr addrspace(1) %ptr32 to ptr 816 %tmp16 = call i32 @__read_pipe_2(ptr addrspace(1) %p32, ptr %tmp15, i32 32, i32 32) #0 817 %tmp18 = addrspacecast ptr addrspace(1) %ptr64 to ptr 818 %tmp19 = call i32 @__read_pipe_2(ptr addrspace(1) %p64, ptr %tmp18, i32 64, i32 64) #0 819 %tmp21 = addrspacecast ptr addrspace(1) %ptr128 to ptr 820 %tmp22 = call i32 @__read_pipe_2(ptr addrspace(1) %p128, ptr %tmp21, i32 128, i32 128) #0 821 %tmp24 = addrspacecast ptr addrspace(1) %ptru to ptr 822 %tmp25 = call i32 @__read_pipe_2(ptr addrspace(1) %pu, ptr %tmp24, i32 400, i32 4) #0 823 ret void 824} 825 826; GCN-PRELINK: declare float @_Z4cbrtf(float) local_unnamed_addr #[[$NOUNWIND_READONLY:[0-9]+]] 827 828; GCN-PRELINK-DAG: attributes #[[$NOUNWIND]] = { nounwind } 829; GCN-PRELINK-DAG: attributes #[[$NOUNWIND_READONLY]] = { nounwind memory(read) "uniform-work-group-size"="false" } 830attributes #0 = { nounwind } 831